6.68KiB; Perl | 2017-11-07 16:11:22+01 | Statements 150 | SLOC 235
1 1
use Ged2site::Utils;
2
3
# See https://github.com/nigelhorne/CGI-Allow
4
5
package Ged2site::Allow;
6
7
# Ged2site is licensed under GPL2.0 for personal use only
8
# njh@bandsman.co.uk
9
10
use strict;
11
use warnings;
12
use File::Spec;
13
use Carp;
14
use Error;
15
16
our %blacklist_countries = (
17
	'BY' => 1,
18
	'MD' => 1,
19
	'RU' => 1,
20
	'CN' => 1,
21
	'BR' => 1,
22
	'UY' => 1,
23
	'TR' => 1,
24
	'MA' => 1,
25
	'VE' => 1,
26
	'SA' => 1,
27
	'CY' => 1,
28
	'CO' => 1,
29
	'MX' => 1,
30
	'IN' => 1,
31
	'RS' => 1,
32
	'PK' => 1,
33
	'UA' => 1,
34
);
35
36
our %blacklist_agents = (
37
	'masscan' => 'Masscan',
38
	'WBSearchBot' => 'Warebay',
39
	'MJ12' => 'Majestic',
40
	'Mozilla/4.0 (compatible; Vagabondo/4.0; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/; http://www.wise-guys.nl/)' => 'wise-guys',
41
	'zgrab' => 'Mozilla/5.0 zgrab/0.x',
42
	'iodc' => 'Mozilla/5.0 (compatible; IODC-Odysseus Survey 21796-100-051215155936-107; +https://iodc.co.uk)',
43
);
44
45
our %status;
46
47
sub allow {
48
	if(!defined($ENV{'REMOTE_ADDR'})) {
49
		# Not running as a CGI
50
		return 1;
51
	}
52
53
	my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
54
55
	my $logger = $args{'logger'};
56
	my $addr = $ENV{'REMOTE_ADDR'};
57
58
	if(defined($status{$addr})) {
59
		# Cache the value
60
		if($logger) {
61
			$logger->debug("$addr: cached value " . $status{$addr});
62
		}
63
		return $status{$addr};
64
	}
65
	if($logger) {
66
		$logger->trace('In ', __PACKAGE__);
67
	}
68
69
	if($ENV{'HTTP_USER_AGENT'}) {
70
		my $blocked = $blacklist_agents{$ENV{'HTTP_USER_AGENT'}};
71
		if($blocked) {
72
			if($logger) {
73
				$logger->info("$blocked blacklisted");
74
			}
75
			$status{$addr} = 0;
76
			throw Error::Simple("$addr: $blocked is blacklisted", 1);
77
		}
78
	}
79
80
	my $info = $args{'info'};
81
	if(!defined($info)) {
82
		if($logger) {
83
			$logger->warn('Info not given');
84
		} else {
85
			carp('Info not given');
86
		}
87
		$status{$addr} = 1;
88
		return 1;
89
	}
90
91
	unless($info->is_search_engine()) {
92
		require Data::Throttler;
93
		Data::Throttler->import();
94
95
		# Handle YAML Errors
96
		my $db_file = File::Spec->catfile($info->tmpdir(), 'throttle');
97
		eval {
98
			my $throttler = Data::Throttler->new(
99
				max_items => 15,
100
				interval => 90,
101
				backend => 'YAML',
102
				backend_options => {
103
					db_file => $db_file
104
				}
105
			);
106
107
			unless($throttler->try_push(key => $ENV{'REMOTE_ADDR'})) {
108
				if($logger) {
109
					# Recommend you send HTTP 429 at this point
110
					$logger->warn("$ENV{REMOTE_ADDR} throttled");
111
				}
112
				$status{$addr} = 0;
113
				throw Error::Simple("$addr has been throttled");
114
			}
115
		};
116
		if($@) {
117
			if($logger) {
118
				$logger->debug("removing $db_file");
119
			}
120
			unlink($db_file);
121
		}
122
123
		unless($ENV{'REMOTE_ADDR'} =~ /^192\.168\./) {
124
			my $lingua = $args{'lingua'};
125
			if(defined($lingua) && $blacklist_countries{uc($lingua->country())}) {
126
				if($logger) {
127
					$logger->warn("$ENV{REMOTE_ADDR} blocked connexion from ", $lingua->country());
128
				}
129
				$status{$addr} = 0;
130
				throw Error::Simple("$ENV{REMOTE_ADDR}: blocked connexion from " . $lingua->country(), 0);
131
			}
132
		}
133
134
		if(defined($ENV{'REQUEST_METHOD'}) && ($ENV{'REQUEST_METHOD'} eq 'GET')) {
135
			my $params = $info->params();
136
			if(defined($params) && keys(%{$params})) {
137
				require CGI::IDS;
138
				CGI::IDS->import();
139
140
				my $ids = CGI::IDS->new();
141
				$ids->set_scan_keys(scan_keys => 1);
142
				if($ids->detect_attacks(request => $params) > 0) {
143
					if($logger) {
144
						$logger->warn("$addr: IDS blocked connexion for ", $info->as_string());
145
					}
146
					$status{$addr} = 0;
147
					throw Error::Simple("$addr: IDS blocked connexion for " . $info->as_string());
148
				}
149
			}
150
		}
151
152
		if(defined($ENV{'HTTP_REFERER'})) {
153
			# Protect against Shellshocker
154
			require Data::Validate::URI;
155
			Data::Validate::URI->import();
156
157
			unless(Data::Validate::URI->new()->is_uri($ENV{'HTTP_REFERER'})) {
158
				if($logger) {
159
					$logger->warn("$ENV{REMOTE_ADDR}: Blocked shellshocker for $ENV{HTTP_REFERER}");
160
				}
161
				$status{$addr} = 0;
162
				throw Error::Simple("$ENV{REMOTE_ADDR}: Blocked shellshocker for $ENV{HTTP_REFERER}");
163
			}
164
			if(($ENV{'HTTP_REFERER'} =~ /^http:\/\/keywords-monitoring-your-success.com\/try.php/) ||
165
			   ($ENV{'HTTP_REFERER'} =~ /^http:\/\/www.tcsindustry\.com\//) ||
166
			   ($ENV{'HTTP_REFERER'} =~ /^http:\/\/free-video-tool.com\//)) {
167
				if($logger) {
168
					$logger->warn("$ENV{REMOTE_ADDR}: Blocked trawler");
169
				}
170
				$status{$addr} = 0;
171
				throw Error::Simple("$ENV{REMOTE_ADDR}: Blocked trawler");
172
			}
173
		}
174
	}
175
176
	require DateTime;
177
	DateTime->import();
178
179
	my @ips;
180
	my $today = DateTime->today()->ymd();
181
	my $readfromcache;
182
183
	my $cache = $args{'cache'};
184
	if(!defined($cache)) {
185
		throw Error::Simple('Either cache or config must be given') unless($args{config});
186
		$cache = ::create_memory_cache(config => $args{'config'}, namespace => __PACKAGE__, logger => $logger);
187
	}
188
	if(defined($cache)) {
189
		my $cachecontent = $cache->get($today);
190
		if($cachecontent) {
191
			if($logger) {
192
				$logger->debug("read from cache $cachecontent");
193
			}
194
			@ips = split(/,/, $cachecontent);
195
			if($ips[0]) {
196
				$readfromcache = 1;
197
			} else {
198
				if($logger) {
199
					$logger->info("DShield cache for $today is empty, deleting to force reread");
200
				}
201
				$cache->remove($today);
202
			}
203
		} elsif($logger) {
204
			$logger->debug("Can't find $today in the cache");
205
		}
206
	} elsif($logger) {
207
		$logger->warn('Couldn\'t create the DShield cache');
208
	}
209
210
	unless($ips[0]) {
211
		require LWP::Simple;
212
		LWP::Simple->import();
213
		require XML::LibXML;
214
		XML::LibXML->import();
215
216
		if($logger) {
217
			$logger->trace('Downloading DShield signatures');
218
		}
219
		my $xml;
220
		eval {
221
			$xml = XML::LibXML->load_xml(string => get('https://secure.dshield.org/api/sources/attacks/100/2012-03-08'));
222
		};
223
		unless($@ || !defined($xml)) {
224
			foreach my $source ($xml->findnodes('/sources/data')) {
225
				my $lastseen = $source->findnodes('./lastseen')->to_literal();
226
				next if($readfromcache && ($lastseen ne $today));  # FIXME: Should be today or yesterday to avoid midnight rush
227
				my $ip = $source->findnodes('./ip')->to_literal();
228
				$ip =~ s/0*(\d+)/$1/g;	# Perl interprets numbers leading with 0 as octal
229
				push @ips, $ip;
230
			}
231
			if(defined($cache) && $ips[0] && !$readfromcache) {
232
				my $cachecontent = join(',', @ips);
233
				if($logger) {
234
					$logger->info("Setting DShield cache for $today to $cachecontent");
235
				}
236
				$cache->set($today, $cachecontent, '1 day');
237
			}
238
		}
239
	}
240
241
	# FIXME: Doesn't realise 1.2.3.4 is the same as 001.002.003.004
242 2
	if(grep($_ eq $ENV{'REMOTE_ADDR'}, @ips)) {
243
		if($logger) {
244
			$logger->warn("Dshield blocked connexion from $ENV{REMOTE_ADDR}");
245
		}
246
		$status{$addr} = 0;
247
		throw Error::Simple("Dshield blocked connexion from $ENV{REMOTE_ADDR}");
248
	}
249
250
	if($info->get_cookie(cookie_name => 'mycustomtrackid')) {
251
		if($logger) {
252
			$logger->warn('Blocking possible jqic');
253
		}
254
		$status{$addr} = 0;
255
		throw Error::Simple('Blocking possible jqic');
256
	}
257
258
	if($logger) {
259
		$logger->trace("Allowing connexion from $ENV{REMOTE_ADDR}");
260
	}
261
262
	$status{$addr} = 1;
263
	return 1;
264
}
265
266
1;