#!/usr/bin/perl # # a program to print out statistics on our junkmail filter # # usage: jstat starttime endtime # # where starttime and endtime are of the form: # MM/DD/YYYY [HH:MM[:SS]] # # jstat looks at all messages from that time interval # and prints the following results # # # of messages in interval # # and % of total messages filed as good # # and % of total messages filed as junk # # and % of total messages improperly filed as good # # and % of total messages improperly filed as junk # # and % of total messages that are good # # and % of total messages that are junk # % accuracy of filter # % of false positives (filed as junk) # % of false negatives (filed as good) # use Time::Local; $logfile = "$ENV{'HOME'}/bin/junkmail.log"; $hour1=0; $minute1=0; $second1=0; $hour2=0; $minute2=0; $second2=0; if ($#ARGV < 1 || $#ARGV > 3) { print "usage: $0 startdate [starttime] enddate [endtime]\n"; print "\twhere date is MM/DD/YYYY and time is HH:MM[:SS]\n"; exit -1; } # grab the first argument, the starting date $_ = shift; ($mon1, $day1, $year1) = split (/\//); # grab the second argument $_ = shift; if (/:/) { # starting time is specified ($hour1, $minute1, $second1) = split(/:/, $_, 3); # grab the next argument $_ = shift; } # grab the ending date ($mon2, $day2, $year2) = split (/\//); if (defined($_ = shift)) { # end time is specified ($hour2, $minute2, $second2) = split(/:/, $_, 3); }; $starttime = timelocal($second1, $minute1, $hour1, $day1, $mon1-1, $year1); $endtime = timelocal($second2, $minute2, $hour2, $day2, $mon2-1, $year2); print "$starttime $endtime \n"; # step through the logfile open (LOG, "$logfile") || die "can't open $logfile for read"; while () { /(\d+) (-?\d+) ?(\d+)?/; if ($starttime <= $1 && $1 <= $endtime) { if ($2 == -1) { $junk++; $badjunk++ if ($3 == 1); } elsif ($2 == 0) { $good++; $badgood++ if ($3 == 1); } } } close LOG; # good is actually good + misfiled junk - misfiled good # junk is actually junk - misfiled junk + misfiled good $comp = $badjunk - $badgood; $total = $junk + $good; # print the statistics printf ("%d total messages\n", $total); printf ("%d filed as good, %d filed as junk\n", $good, $junk); printf ("%d misfiled as good, %d misfiled as junk\n", $badgood, $badjunk); printf ("%d actual good, %d actual junk\n", $good + $comp, $junk - $comp); printf ("Junkmail is %5.2f%% of total mail\n", 100.0*($junk - $comp)/($total)); printf ("False positive is: %5.2f%%, false negative is: %5.2f%%\n", 100.0*$badjunk/($good+$comp), 100.0*$badgood/($junk-$comp));