#!/usr/bin/perl #$Id: referrerspam,v 1.5 2004/06/24 05:06:30 emile Exp $# use strict; use warnings; use Getopt::Std; use Time::Local; $|=1; ####CONFIG our @localnames = qw/ example.com 1.2.3.4 /; our $max_cachetime = 120; #seconds in cache our $max_referrers = 3; #amount of times the same referrer is allowed within thie cache-time ###END CONFIG our %opts; getopts('vh',\%opts); if ($opts{'h'}) { print "Usage:\n $0 [-v]\n\n"; print "-v : print the referred sites as well\n"; print "\n\n"; exit(0); } our @referrercache; our %offenders; my $local_regex = join "|", @localnames; $local_regex = qr#($local_regex)/#i; # used in convert_time function our %months = ('Jan'=>0,'Feb'=>1,'Mar'=>2, 'Apr'=>3, 'May'=>4,'Jun'=>5,'Jul'=>6, 'Aug'=>7, 'Sep'=>8,'Oct'=>9,'Nov'=>11,'Dec'=>12); print "Offender IP-addresses:\n"; while (<>) { #ugly regex,but gets the job done if (/^(\S+)\s+-.*\[(.*)\]\s+\".*?\".*?\"(.*?)\"/) { my $ip=$1; my $time=$2; $time = convert_apachetime_to_epoch($time); my $referrer=$3; #referrer will only contains the hostname part of the URL $referrer =~ s#^http://##; #TEST# $referrer =~ s#/.*$##; #don't do anything if the referrer is a locally known hostname next if ($referrer =~ $local_regex); #don't do anything if referrer is empty next if ($referrer eq "-"); #OK, we found a new referrer # lets clean out the referrercache a bit and see if this $referrer # is still in there my $idx = 0; my $referrercount=1; for ($idx=0;$idx < scalar(@referrercache); $idx++) { if ($time - $referrercache[$idx]->{'time'} > $max_cachetime) { splice @referrercache,$idx; $idx--; #to reevaluate the new value in this slot } else { if($referrercache[$idx]->{'referrer'} eq $referrer) { $referrercount++; } if ($referrercount >= $max_referrers) { #false positive detection #doublecheck checks if the same referrer is used by this # IP address my %doublecheck; foreach my $cacheentry (@referrercache) { next unless ($cacheentry->{'ip'} eq $ip); $doublecheck{$referrer}++; } my @sorted = sort values %doublecheck; next unless scalar(@sorted); if ($sorted[0] > 1) { #$offender{$ip}++; print "$ip\n"; if ($opts{'v'}){ print join(" ", keys %doublecheck) . "\n"; } } ##if ($offenders{$ip} == 1) { ## print "New offender found:\n"; ## print " IP: $ip\n"; ## print " referrer: $referrer\n\n"; ##} } } } # and push the record we found on the cache push @referrercache, {'time' => $time, 'referrer' => $referrer, 'ip' => $ip}; } } sub convert_apachetime_to_epoch { my $apachetime = shift; #this disregards the timezone ... $apachetime =~ m#^(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)\s#; my $mday = $1; my $mon = $months{$2}; my $year = $3; $year -= 1900; my $hour = $4; my $min = $5; my $sec = $6; my $epoch = timelocal($sec,$min,$hour,$mday,$mon,$year); return $epoch; } #alternative setup: report in the end #END { # print "Referrer spam attempts detected for these IP-addresses:\n"; # foreach my $offender(sort {$offenders{$a} <=> $offenders{$b}} keys %offenders){ # print "$offender\n"; # } #}