#!/usr/bin/perl # # Quick filter to detect junk mail. # # Criterion 1: the Received: line domain does not match the # domain of the From: line. # # Criterion 2: the Received: line domain is an open mail relay # as defined by ordb.org. We check it by seeing if the address # (in reverse order) is included in the nameserver at relays.ordb.org. # # Criterion 3: my name must be on the To: or Cc: line # # Criterion 4: the mail has a Content-Type: text/html or multipart tag # # Exception 1: If the address is on the EvilFucks list, then they are # an annoying and evil fuck and are summarily ruled as junk. # # Exception 2: If the address is on the GoodFroms list, then it's a # friend or colleague who sends mail through a relay (e.g., Cornell # mail with a cable-modem at home) and in unconditionally ruled as not-junk. # Addition: All alias names in my .mailrc are automatically imported # into the GoodFroms list. # # Exception 3: If the address is on the GoodTos list, then it's ok, # such as a mailing list name. # # Added logging (via the -l flag), so that we can check (externally) # how we're doing on accuracy. FNA 04/04/03 # $logfile = $ENV{'HOME'} . "/bin/junkmail.log"; $Logging = 0; # logging is off by default $me = 'frank@mycause.com'; # Configure here!!! $command="/usr/bin/host"; $sawReceived = 0; $Received = ""; $From = ""; $To = ""; $ContentType= ""; $into = 0; $mailrc = $ENV{'HOME'} . "/.mailrc"; ############ # **** GoodTos **** # List of good To addresses we'll accept (e.g., mailing lists) # # Configure here!!! # ############ @GoodTos = ( 'Cayuga Camera Club', # local photography mailing list 'mailinglist-owner@blarg.com', # mailing list example '.rr.com', # cable modem (road runner) ); ############ # *** GoodFroms **** # List of exceptions, known good addresses that use relays # that we'll automatically accept # # Configure here!!! # ############ @GoodFroms = ( 'still@large.com', # exception #1 'blah@blahblahblah.com', # another exception ); # process the command line options (only one curently) do 'getopts.pl' || die "can't find YAGRIP library, stopped"; Getopts('l') || die ("usage: $0 [-l] where -l logs the result; program checks if stdin is junkmail"); if (defined($opt_l)) { $Logging = 1; } open (MAILRC, "$mailrc") || die "can't open $mailrc for read"; while () { # look for alias lines in the .mailrc if (/^alias\s+[^\s]+\s+([^\s]+)$/) { # add aliased names to our GoodFroms list # note: aliases must NOT have spaces after them! push @GoodFroms, $1; } } ############ # **** EvilFucks **** # Known junk mail patterns. I don't really want to just have # lists, but there are some places that seem to bombard me and # still pass the previous rules AND use different accounts within # that domain. # # Configure here!!! # ############ @EvilFucks = ( 'ink..@', # ink-selling fuckers 'offers@', # various named fuckers 'offer@', # various named fuckers 'offerz@', # various mizspelled named fuckers 'clicknow@', # clicking fuckers 'cosmicoffers@', # various named fuckers 'LuckyDeals@', # lucky fuckers 'coffer@migada.com', # more fuckers 'ombramarketing.com', # fuckers 'opt-in.emailsvc.net', # fuckers 'db.offermonkey.com', # monkey fuckers 'azogle.com', # google fuckers 'azoogle.com', # google fuckers 'reply.mb00.net', # fucking pricks 'justforyounewsletter.com', # just for fuckers 'link2buy.com', # link fuckers 'lists.regtoday1.net', # list fuckers 'dailypromo', # daily fuckers 'daliypromo', # generic daily misspelled fuckers 'zoodak.com', # zoo fuckers 'zudak.com', # zoo fuckers (alternate spelling) 'hispeedmediaoffers.com', # high speed fuckers 'hsmedia-offer.com', # renamed high speed fuckers 'handpickeddeals', # hand picked fuckers 'hispeed-direct.com', # still more high speed fuckers 'maktoob.com', # penis enlargment fuckers 'easilythebestonline.com', # easily the worst fuckers 'ghty5@bluemail.dk', # blue fuckers '@list.*com' # generic, name changing, list fuckers ); while (<>) { if (/^$/) { # End of headers. Check if: # 1. relay, # 2. I'm listed in the To:. # 3. the Received and From lines match, # 4. the ContentType: $rc = system("$command $relay"); if (!$rc) { # non-error exit code (host IS a relay) print "host is relay\njunk\n"; QuitIt (-1); } # end if (!$rc) # error exit code (i.e., host not found => NOT a relay) print "not relay...\n"; print "To: [$To]\n"; if ($To !~ /$me/i) { print "Message not To: me..."; # check the good list foreach $f (@GoodTos) { # check if this comes from the exception list of good addresses if ($To =~ /${f}/i) { print "To [$To] is on our list\ngood\n"; QuitIt (0); } } print "\njunk\n"; # I'm not in the To: list, it's junk QuitIt (-1); } print "...is ok.\n"; print "Received: [$Received] From: [$From] \n"; if (CheckIfEqual(lc $Received, lc $From)) { # non-error exit code print "headers match\n"; #QuitIt(0); } else { # error exit code (junk mail) print "false From\njunk\n"; QuitIt(-1); } if ($ContentType =~ m|text/html| || $ContentType =~ m|multipart/alternative| || $ContentType =~ m|multipart/mixed|) { print "HTML or multipart content\njunk\n"; QuitIt (-1); } else { print "Content type is ok.\ngood\n"; QuitIt (0); } } # end if (^$) # note: we're looking at just the From: headers if (/From: (.*)/i) { $From = $1; if ($From =~ /\<.*\>/) { # From: line of the form: Name name ... # print "<> form\n"; # $From =~ s/.*\@\.(\w+\.\w+)$/$1/; # extract the host after the @ $From =~ s/.*<(.*>)$/$1/; # extract the name between the <>'s $From =~ s/>//; # strip any trailing ">" } elsif ($From =~ /.*\.* \(.*\)/) { # From: line of the form: adddress@site.domain (Name name...) # print "() form\n"; $From =~ s/.*\@(\w+\.\w+) \(.*/$1/; # extract the host after the @ } foreach $f (@EvilFucks) { # check if this comes from the list of known # (personal) offender (sites) if ($From =~ /${f}/i) { print "From [$From] is on our shit list\njunk\n"; QuitIt (-1); } } foreach $f (@GoodFroms) { # check if this comes from the exception list of good addresses if ($From =~ /${f}/i) { print "From [$From] is on our list\ngood\n"; QuitIt (0); } } } # To: and Cc: headers can span lines, just keep appending if so if ($into) { if (/^\s+(.*)/) { # line starts with whitespace, it's a continuation $To .= $1; } else { # else no longer in a To: header $into = 0; } } # match a To: or Cc: header if (/^To: (.*)/i || /^Cc: (.*)/i) { $into = 1; $To .= $1; } if (/^Content-Type: (.*)/i) { $ContentType = $1; } if (!$sawReceived && /Received: from (.*)$/) { $relay = $1; $sawReceived = 1; $Received = $1; #$Received =~ s|^([^ ]+) .*|$1|; # grab the first word # Received: line is of the form: # Received: from club.uec.ac.jp (pa116.sdi.tpnet.pl [213.25.241.116]) # Grab the word between the "(" and the "[" or " " $Received =~ s|^.*\(([^[ ]+) .*|$1|; # grab the last two parts of that word # i.e., the top 2 domain levels, e.g., tpnet.pl in the example above $Received =~ s/.*\.(\w+\.\w+)$/$1/; # extract the IP number of the most recent sender # and set relay to be the numbers in reverse order with # .relays.ordb.org as a suffix. $relay =~ m|\(.*\[([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)\]\)$|; $relay = "$4.$3.$2.$1.relays.ordb.org"; print "relay is $relay \n"; } } # end while() # Shouldn't get here unless it's a mal-formed mail message. # Exit with non-error code, just in case. print "blork!\n"; QuitIt (0); sub CheckIfEqual { my ($r, $f) = @_; my (@r, @f, $max, $i); $r =~ s/.*@//; # delete everything before the @ if present $f =~ s/.*@//; # delete everything before the @ if present @r = split(/\./, $r); @f = split(/\./, $f); $max = ($#r > $#f)?$#f:$#r; for ($i = 0; $i <= $max; $i++) { if (pop @r ne pop @f) { return 0; } else { } } return 1; } # # A function to exit the program. If Logging is enabled # it'll write an entry to our junkmail tracking logfile. # Exits with the error code it is passed as a parameter. # sub QuitIt { if ($Logging) { # make an entry in our mail tracking log file open (LOG, ">>$logfile") || die "Can't open logfile $logfile for append"; # log the current time and whether it was # junk or not (-1 or 0, respectively) printf LOG "%d %d\n", time(), $_[0]; close LOG; } # and finally exit for real exit $_[0]; }