6.76KiB; Perl | 2020-04-29 15:38:45+02 | Statements 150 | SLOC 236
1 1
use Ged2site::Utils;
2
3
# See https://github.com/nigelhorne/CGI-Allow
4
5
package Ged2site::Allow;
6
7
# Ged2site is licensed under GPL2.0 for personal use only
8
# njh@bandsman.co.uk
9
10
use strict;
11
use warnings;
12
use File::Spec;
13
use Carp;
14
use Error;
15
16
our %blacklist_countries = (
17
	'BY' => 1,
18
	'MD' => 1,
19
	'RU' => 1,
20
	'CN' => 1,
21
	'BR' => 1,
22
	'UY' => 1,
23
	'TR' => 1,
24
	'MA' => 1,
25
	'VE' => 1,
26
	'SA' => 1,
27
	'CY' => 1,
28
	'CO' => 1,
29
	'MX' => 1,
30
	'IN' => 1,
31
	'RS' => 1,
32
	'PK' => 1,
33
	'UA' => 1,
34
);
35
36
our %blacklist_agents = (
37
	'masscan' => 'Masscan',
38
	'WBSearchBot' => 'Warebay',
39
	'MJ12' => 'Majestic',
40
	'Mozilla/4.0 (compatible; Vagabondo/4.0; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/; http://www.wise-guys.nl/)' => 'wise-guys',
41
	'zgrab' => 'Mozilla/5.0 zgrab/0.x',
42
	'Mozilla/5.0 (compatible; SemrushBot/6~bl; +http://www.semrush.com/bot.html)' => 'SemrushBot',
43
	'iodc' => 'Mozilla/5.0 (compatible; IODC-Odysseus Survey 21796-100-051215155936-107; +https://iodc.co.uk)',
44
);
45
46
our %status;
47
48
sub allow {
49
	my $addr = $ENV{'REMOTE_ADDR'};
50
51
	if(!defined($addr)) {
52
		# Not running as a CGI
53
		return 1;
54
	}
55
56
	my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
57
58
	my $logger = $args{'logger'};
59
60
	if(defined($status{$addr})) {
61
		# Cache the value
62
		if($logger) {
63
			$logger->debug("$addr: cached value ", $status{$addr});
64
		}
65
		return $status{$addr};
66
	}
67
	if($logger) {
68
		$logger->trace('In ', __PACKAGE__);
69
	}
70
71
	if($ENV{'HTTP_USER_AGENT'}) {
72
		my $blocked = $blacklist_agents{$ENV{'HTTP_USER_AGENT'}};
73
		if($blocked) {
74
			if($logger) {
75
				$logger->info("$blocked blacklisted");
76
			}
77
			$status{$addr} = 0;
78
			throw Error::Simple("$addr: $blocked is blacklisted", 1);
79
		}
80
	}
81
82
	my $info = $args{'info'};
83
	if(!defined($info)) {
84
		if($logger) {
85
			$logger->warn('Info not given');
86
		} else {
87
			carp('Info not given');
88
		}
89
		$status{$addr} = 1;
90
		return 1;
91
	}
92
93
	unless($info->is_search_engine()) {
94
		require Data::Throttler;
95
		Data::Throttler->import();
96
97
		# Handle YAML Errors
98
		my $db_file = File::Spec->catfile($info->tmpdir(), 'throttle');
99
		eval {
100
			my $throttler = Data::Throttler->new(
101
				max_items => 15,
102
				interval => 90,
103
				backend => 'YAML',
104
				backend_options => {
105
					db_file => $db_file
106
				}
107
			);
108
109
			unless($throttler->try_push(key => $ENV{'REMOTE_ADDR'})) {
110
				if($logger) {
111
					# Recommend you send HTTP 429 at this point
112
					$logger->warn("$ENV{REMOTE_ADDR} throttled");
113
				}
114
				$status{$addr} = 0;
115
				throw Error::Simple("$addr has been throttled");
116
			}
117
		};
118
		if($@) {
119
			if($logger) {
120
				$logger->debug("removing $db_file");
121
			}
122
			unlink($db_file);
123
		}
124
125
		unless($ENV{'REMOTE_ADDR'} =~ /^192\.168\./) {
126
			my $lingua = $args{'lingua'};
127
			if(defined($lingua) && $blacklist_countries{uc($lingua->country())}) {
128
				if($logger) {
129
					$logger->warn("$ENV{REMOTE_ADDR} blocked connexion from ", $lingua->country());
130
				}
131
				$status{$addr} = 0;
132
				throw Error::Simple("$ENV{REMOTE_ADDR}: blocked connexion from " . $lingua->country(), 0);
133
			}
134
		}
135
136
		if(defined($ENV{'REQUEST_METHOD'}) && ($ENV{'REQUEST_METHOD'} eq 'GET')) {
137
			my $params = $info->params();
138
			if(defined($params) && keys(%{$params})) {
139
				require CGI::IDS;
140
				CGI::IDS->import();
141
142
				my $ids = CGI::IDS->new();
143
				$ids->set_scan_keys(scan_keys => 1);
144
				if($ids->detect_attacks(request => $params) > 0) {
145
					if($logger) {
146
						$logger->warn("$addr: IDS blocked connexion for ", $info->as_string());
147
					}
148
					$status{$addr} = 0;
149
					throw Error::Simple("$addr: IDS blocked connexion for " . $info->as_string());
150
				}
151
			}
152
		}
153
154
		if(defined($ENV{'HTTP_REFERER'})) {
155
			# Protect against Shellshocker
156
			require Data::Validate::URI;
157
			Data::Validate::URI->import();
158
159
			unless(Data::Validate::URI->new()->is_uri($ENV{'HTTP_REFERER'})) {
160
				if($logger) {
161
					$logger->warn("$ENV{REMOTE_ADDR}: Blocked shellshocker for $ENV{HTTP_REFERER}");
162
				}
163
				$status{$addr} = 0;
164
				throw Error::Simple("$ENV{REMOTE_ADDR}: Blocked shellshocker for $ENV{HTTP_REFERER}");
165
			}
166
			if(($ENV{'HTTP_REFERER'} =~ /^http:\/\/keywords-monitoring-your-success.com\/try.php/) ||
167
			   ($ENV{'HTTP_REFERER'} =~ /^http:\/\/www.tcsindustry\.com\//) ||
168
			   ($ENV{'HTTP_REFERER'} =~ /^http:\/\/free-video-tool.com\//)) {
169
				if($logger) {
170
					$logger->warn("$ENV{REMOTE_ADDR}: Blocked trawler");
171
				}
172
				$status{$addr} = 0;
173
				throw Error::Simple("$ENV{REMOTE_ADDR}: Blocked trawler");
174
			}
175
		}
176
	}
177
178
	require DateTime;
179
	DateTime->import();
180
181
	my @ips;
182
	my $today = DateTime->today()->ymd();
183
	my $readfromcache;
184
185
	my $cache = $args{'cache'};
186
	if(!defined($cache)) {
187
		throw Error::Simple('Either cache or config must be given') unless($args{config});
188
		$cache = ::create_memory_cache(config => $args{'config'}, namespace => __PACKAGE__, logger => $logger);
189
	}
190
	if(defined($cache)) {
191
		my $cachecontent = $cache->get($today);
192
		if($cachecontent) {
193
			if($logger) {
194
				$logger->debug("read from cache $cachecontent");
195
			}
196
			@ips = split(/,/, $cachecontent);
197
			if($ips[0]) {
198
				$readfromcache = 1;
199
			} else {
200
				if($logger) {
201
					$logger->info("DShield cache for $today is empty, deleting to force reread");
202
				}
203
				$cache->remove($today);
204
			}
205
		} elsif($logger) {
206
			$logger->debug("Can't find $today in the cache");
207
		}
208
	} elsif($logger) {
209
		$logger->warn('Couldn\'t create the DShield cache');
210
	}
211
212
	unless($ips[0]) {
213
		require LWP::Simple;
214
		LWP::Simple->import();
215
		require XML::LibXML;
216
		XML::LibXML->import();
217
218
		if($logger) {
219
			$logger->trace('Downloading DShield signatures');
220
		}
221
		my $xml;
222
		eval {
223
			$xml = XML::LibXML->load_xml(string => get('https://secure.dshield.org/api/sources/attacks/100/2012-03-08'));
224
		};
225
		unless($@ || !defined($xml)) {
226
			foreach my $source ($xml->findnodes('/sources/data')) {
227
				my $lastseen = $source->findnodes('./lastseen')->to_literal();
228
				next if($readfromcache && ($lastseen ne $today));  # FIXME: Should be today or yesterday to avoid midnight rush
229
				my $ip = $source->findnodes('./ip')->to_literal();
230
				$ip =~ s/0*(\d+)/$1/g;	# Perl interprets numbers leading with 0 as octal
231
				push @ips, $ip;
232
			}
233
			if(defined($cache) && $ips[0] && !$readfromcache) {
234
				my $cachecontent = join(',', @ips);
235
				if($logger) {
236
					$logger->info("Setting DShield cache for $today to $cachecontent");
237
				}
238
				$cache->set($today, $cachecontent, '1 day');
239
			}
240
		}
241
	}
242
243
	# FIXME: Doesn't realise 1.2.3.4 is the same as 001.002.003.004
244 2
	if(grep($_ eq $ENV{'REMOTE_ADDR'}, @ips)) {
245
		if($logger) {
246
			$logger->warn("Dshield blocked connexion from $ENV{REMOTE_ADDR}");
247
		}
248
		$status{$addr} = 0;
249
		throw Error::Simple("Dshield blocked connexion from $ENV{REMOTE_ADDR}");
250
	}
251
252
	if($info->get_cookie(cookie_name => 'mycustomtrackid')) {
253
		if($logger) {
254
			$logger->warn('Blocking possible jqic');
255
		}
256
		$status{$addr} = 0;
257
		throw Error::Simple('Blocking possible jqic');
258
	}
259
260
	if($logger) {
261
		$logger->trace("Allowing connexion from $ENV{REMOTE_ADDR}");
262
	}
263
264
	$status{$addr} = 1;
265
	return 1;
266
}
267
268
1;