6.91KiB; Perl | 2020-10-21 15:47:28+02 | Statements 150 | SLOC 239
1 1
use Ged2site::Utils;
2
3
# See https://github.com/nigelhorne/CGI-Allow
4
5
package Ged2site::Allow;
6
7
# Ged2site is licensed under GPL2.0 for personal use only
8
# njh@bandsman.co.uk
9
10
use strict;
11
use warnings;
12
use File::Spec;
13
use Carp;
14
use Error;
15
16
our %blacklist_countries = (
17
	'BY' => 1,
18
	'MD' => 1,
19
	'RU' => 1,
20
	'CN' => 1,
21
	'BR' => 1,
22
	'UY' => 1,
23
	'TR' => 1,
24
	'MA' => 1,
25
	'VE' => 1,
26
	'SA' => 1,
27
	'CY' => 1,
28
	'CO' => 1,
29
	'MX' => 1,
30
	'IN' => 1,
31
	'RS' => 1,
32
	'PK' => 1,
33
	'UA' => 1,
34
);
35
36
our %blacklist_agents = (
37
	'Barkrowler' => 'Barkrowler',
38
	'masscan' => 'Masscan',
39
	'WBSearchBot' => 'Warebay',
40
	'MJ12' => 'Majestic',
41
	'Mozilla/4.0 (compatible; Vagabondo/4.0; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/; http://www.wise-guys.nl/)' => 'wise-guys',
42
	'Mozilla/5.0 zgrab/0.x' => 'zgrab',
43
	'Mozilla/5.0 (compatible; IODC-Odysseus Survey 21796-100-051215155936-107; +https://iodc.co.uk)' => 'iodc',
44
	'Mozilla/5.0 (compatible; adscanner/)' => 'adscanner',
45
	'Mozilla/5.0 (compatible; SemrushBot/6~bl; +http://www.semrush.com/bot.html)' => 'SemrushBot',
46
	'ZoominfoBot (zoominfobot at zoominfo dot com)' => 'zoominfobot',
47
);
48
49
our %status;
50
51
sub allow {
52
	my $addr = $ENV{'REMOTE_ADDR'};
53
54
	if(!defined($addr)) {
55
		# Not running as a CGI
56
		return 1;
57
	}
58
59
	my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
60
61
	my $logger = $args{'logger'};
62
63
	if(defined($status{$addr})) {
64
		# Cache the value
65
		if($logger) {
66
			$logger->debug("$addr: cached value ", $status{$addr});
67
		}
68
		return $status{$addr};
69
	}
70
	if($logger) {
71
		$logger->trace('In ', __PACKAGE__);
72
	}
73
74
	if($ENV{'HTTP_USER_AGENT'}) {
75
		my $blocked = $blacklist_agents{$ENV{'HTTP_USER_AGENT'}};
76
		if($blocked) {
77
			if($logger) {
78
				$logger->info("$blocked blacklisted");
79
			}
80
			$status{$addr} = 0;
81
			throw Error::Simple("$addr: $blocked is blacklisted", 1);
82
		}
83
	}
84
85
	my $info = $args{'info'};
86
	if(!defined($info)) {
87
		if($logger) {
88
			$logger->warn('Info not given');
89
		} else {
90
			carp('Info not given');
91
		}
92
		$status{$addr} = 1;
93
		return 1;
94
	}
95
96
	unless($info->is_search_engine()) {
97
		require Data::Throttler;
98
		Data::Throttler->import();
99
100
		# Handle YAML Errors
101
		my $db_file = File::Spec->catfile($info->tmpdir(), 'throttle');
102
		eval {
103
			my $throttler = Data::Throttler->new(
104
				max_items => 15,
105
				interval => 90,
106
				backend => 'YAML',
107
				backend_options => {
108
					db_file => $db_file
109
				}
110
			);
111
112
			unless($throttler->try_push(key => $ENV{'REMOTE_ADDR'})) {
113
				if($logger) {
114
					# Recommend you send HTTP 429 at this point
115
					$logger->warn("$ENV{REMOTE_ADDR} throttled");
116
				}
117
				$status{$addr} = 0;
118
				throw Error::Simple("$addr has been throttled");
119
			}
120
		};
121
		if($@) {
122
			if($logger) {
123
				$logger->debug("removing $db_file");
124
			}
125
			unlink($db_file);
126
		}
127
128
		unless($ENV{'REMOTE_ADDR'} =~ /^192\.168\./) {
129
			my $lingua = $args{'lingua'};
130
			if(defined($lingua) && $blacklist_countries{uc($lingua->country())}) {
131
				if($logger) {
132
					$logger->warn("$ENV{REMOTE_ADDR} blocked connexion from ", $lingua->country());
133
				}
134
				$status{$addr} = 0;
135
				throw Error::Simple("$ENV{REMOTE_ADDR}: blocked connexion from " . $lingua->country(), 0);
136
			}
137
		}
138
139
		if(defined($ENV{'REQUEST_METHOD'}) && ($ENV{'REQUEST_METHOD'} eq 'GET')) {
140
			my $params = $info->params();
141
			if(defined($params) && keys(%{$params})) {
142
				require CGI::IDS;
143
				CGI::IDS->import();
144
145
				my $ids = CGI::IDS->new();
146
				$ids->set_scan_keys(scan_keys => 1);
147
				if($ids->detect_attacks(request => $params) > 0) {
148
					if($logger) {
149
						$logger->warn("$addr: IDS blocked connexion for ", $info->as_string());
150
					}
151
					$status{$addr} = 0;
152
					throw Error::Simple("$addr: IDS blocked connexion for " . $info->as_string());
153
				}
154
			}
155
		}
156
157
		if(defined($ENV{'HTTP_REFERER'})) {
158
			# Protect against Shellshocker
159
			require Data::Validate::URI;
160
			Data::Validate::URI->import();
161
162
			unless(Data::Validate::URI->new()->is_uri($ENV{'HTTP_REFERER'})) {
163
				if($logger) {
164
					$logger->warn("$ENV{REMOTE_ADDR}: Blocked shellshocker for $ENV{HTTP_REFERER}");
165
				}
166
				$status{$addr} = 0;
167
				throw Error::Simple("$ENV{REMOTE_ADDR}: Blocked shellshocker for $ENV{HTTP_REFERER}");
168
			}
169
			if(($ENV{'HTTP_REFERER'} =~ /^http:\/\/keywords-monitoring-your-success.com\/try.php/) ||
170
			   ($ENV{'HTTP_REFERER'} =~ /^http:\/\/www.tcsindustry\.com\//) ||
171
			   ($ENV{'HTTP_REFERER'} =~ /^http:\/\/free-video-tool.com\//)) {
172
				if($logger) {
173
					$logger->warn("$ENV{REMOTE_ADDR}: Blocked trawler");
174
				}
175
				$status{$addr} = 0;
176
				throw Error::Simple("$ENV{REMOTE_ADDR}: Blocked trawler");
177
			}
178
		}
179
	}
180
181
	require DateTime;
182
	DateTime->import();
183
184
	my @ips;
185
	my $today = DateTime->today()->ymd();
186
	my $readfromcache;
187
188
	my $cache = $args{'cache'};
189
	if(!defined($cache)) {
190
		throw Error::Simple('Either cache or config must be given') unless($args{config});
191
		$cache = ::create_memory_cache(config => $args{'config'}, namespace => __PACKAGE__, logger => $logger);
192
	}
193
	if(defined($cache)) {
194
		my $cachecontent = $cache->get($today);
195
		if($cachecontent) {
196
			if($logger) {
197
				$logger->debug("read from cache $cachecontent");
198
			}
199
			@ips = split(/,/, $cachecontent);
200
			if($ips[0]) {
201
				$readfromcache = 1;
202
			} else {
203
				if($logger) {
204
					$logger->info("DShield cache for $today is empty, deleting to force reread");
205
				}
206
				$cache->remove($today);
207
			}
208
		} elsif($logger) {
209
			$logger->debug("Can't find $today in the cache");
210
		}
211
	} elsif($logger) {
212
		$logger->warn('Couldn\'t create the DShield cache');
213
	}
214
215
	unless($ips[0]) {
216
		require LWP::Simple;
217
		LWP::Simple->import();
218
		require XML::LibXML;
219
		XML::LibXML->import();
220
221
		if($logger) {
222
			$logger->trace('Downloading DShield signatures');
223
		}
224
		my $xml;
225
		eval {
226
			$xml = XML::LibXML->load_xml(string => get('https://secure.dshield.org/api/sources/attacks/100/2012-03-08'));
227
		};
228
		unless($@ || !defined($xml)) {
229
			foreach my $source ($xml->findnodes('/sources/data')) {
230
				my $lastseen = $source->findnodes('./lastseen')->to_literal();
231
				next if($readfromcache && ($lastseen ne $today));  # FIXME: Should be today or yesterday to avoid midnight rush
232
				my $ip = $source->findnodes('./ip')->to_literal();
233
				$ip =~ s/0*(\d+)/$1/g;	# Perl interprets numbers leading with 0 as octal
234
				push @ips, $ip;
235
			}
236
			if(defined($cache) && $ips[0] && !$readfromcache) {
237
				my $cachecontent = join(',', @ips);
238
				if($logger) {
239
					$logger->info("Setting DShield cache for $today to $cachecontent");
240
				}
241
				$cache->set($today, $cachecontent, '1 day');
242
			}
243
		}
244
	}
245
246
	# FIXME: Doesn't realise 1.2.3.4 is the same as 001.002.003.004
247 2
	if(grep($_ eq $ENV{'REMOTE_ADDR'}, @ips)) {
248
		if($logger) {
249
			$logger->warn("Dshield blocked connexion from $ENV{REMOTE_ADDR}");
250
		}
251
		$status{$addr} = 0;
252
		throw Error::Simple("Dshield blocked connexion from $ENV{REMOTE_ADDR}");
253
	}
254
255
	if($info->get_cookie(cookie_name => 'mycustomtrackid')) {
256
		if($logger) {
257
			$logger->warn('Blocking possible jqic');
258
		}
259
		$status{$addr} = 0;
260
		throw Error::Simple('Blocking possible jqic');
261
	}
262
263
	if($logger) {
264
		$logger->trace("Allowing connexion from $ENV{REMOTE_ADDR}");
265
	}
266
267
	$status{$addr} = 1;
268
	return 1;
269
}
270
271
1;