#!/usr/bin/perl -w # # Original Version # ---------------- # http://www.UCEAS.net/Scripts/AntiSpam.Collector.pl # # Release License # --------------- # GPL # # # Purpose # ------- # Separate the incoming spam into different files # # Process # ------- # Collect incoming spam at: SpamCollector@UCEAS.net # Send back acknowledgement status email back submitter of the unwanted spam # - majordomo config vs mailman # Process the new spam collected # - update /etc/mail/spam_domains # - update /etc/mail/spammers # Save original incoming spam submission for the day for "regeneration" # # # Spam Prevention # --------------- # use http://Spam.uceas.net -- to view the daily statistics # download/install our "spam db files" into your mail server # ( sendmail or exim or ?? ) # # # To Do # ------- # - watchout for From ( >From ) in the beginning of the sentence in the body of the emails # - not solved yet # - Fix "submitting user" as NOT the spammer # - Fix Respawning # - NO error checking yet ... # - folders for ~/Mail/1999, ~/Mail/2000 ~/Mail/customers, ~/Mail/friends # - watch out for additional incoming emails ( file locking ) # # - Collect data from other "spam collection" servers ( distributed database ) # # # Client Usage ( for submitting their unwanted spam ) # ------------- # submit.spam.sh # - aka - # AntiSpam.Collector.pl -update -submit -user /var/spool/mail/your-mailbox-with-spam # # # SpamCollector Usage ( for processing ) # ------------------- # mailbox2file.sh # - aka - # AntiSpam.Collector.pl -update -process -user /var/spool/mail/spamcollector -web ww -orig xx -head yy # ww = top of the html directory ( /home/httpd/html ) # xx = html directory of Orignal spam ( SpamCollector.Orig ) # yy = html directory of spam headers ( SpamCollector.Head ) # # http://your-site.com/SpamCollector.Head # http://your-site.com/SpamCollector.Orig # # # Setup & Permissions # -------------------- # chown root.mj html/Scripts/AntiSpam.Collector.pl # chown 754 html/Scripts/AntiSpam.Collector.pl # # crontab -e # ... # # # # mailbox2file.sh invokes AntiSpam.Collector.pl to process /var/spool/mail/spamcoll # # # 05,35 * * * * /home/httpd/html/Scripts/mailbox2file.sh # 08,38 * * * * /home/httpd/html/Scripts/mailbox2file.sh test # # # Create the Working directories # ------------------------------ # cd /home/httpd/html # # mkdir SpamCollector.Head SpamCollector.Orig SpamCollector.Test SpamCollector.Test.Orig # chmod 770 SpamCollector.Head SpamCollector.Orig SpamCollector.Test SpamCollector.Test.Orig # chown WebMaster.mj SpamCollector.Head SpamCollector.Orig SpamCollector.Test SpamCollector.Test.Orig # # # # 18-Oct-02 amo Date-of-Birth # 23-Dec-02 amo Added real code and comments - Ver 0.2 release # 29-Dec-02 amo Ver-0.3 Fixed -orig and -head # 02-Jun-03 amo Ver-0.4 Added SpamCollectorTest or SpamCollector # 13-May-03 amo Added chmod 644 *.orig # # # # --------------------- User Defined Variables/Options ------------------------- # my ( $DEBUG ) = 0; # # # Allow users to submit the unwanted spam left in their /var/spool/mailbox # my ( $SubmitSpam ) = 0; # user sending in their spam into the repository # my ( $ProcessSpam ) = 0; # # my ( $Update ) = 0; # update the script before processing # # # # if ~/Mail/Family/xxx, ~/Mail/Friends/yyy and ~/Mail/Customers/zzz... # - use those names ( family, friends, .. ) as folders # # # The default spam-filled mailbox to convert and save as files # #y ( $SRC ) = "/var/spool/mail/root" ; #y ( $SRC ) = "/var/spool/mail/your-left-over-spam" ; # your mailbox with the spam my ( $SRC ) = "/var/spool/mail/spamcollector" ; # incoming spam mailbox # # # my ( $WEB ) = "/home/httpd/html"; # top of the web tree my ( $DST ) = "SpamCollector.Head" ; # Spam database for web browsing my ( $ORG ) = "SpamCollector.Orig" ; # original spam report # # # --------------------- End of user cconfig ------------------------------------- # my ( $NM ) = "AntiSpam.Collector.pl"; # my ( $Ver) = "0.5test"; # Release Version # # # Where to Find the binaries on your System # ----------------------------------------- # my ( $WGET ) = "/usr/bin/wget"; my ( $DATE ) = "/bin/date"; my ( $MV ) = "/bin/mv"; # # # Email address to submit your Unwanted spam # my ( $SpamCollectorEmail ) = "SpamCollector\@UCEAS.net"; # # my ( $SpamDir ) = "Undefined"; # DST/yr/mon/date my ( $SpamFile ) = "Undefined"; # time.unique_name # my ( $SpamSubmitorFile ) = "Undefined"; my ( $SpamSubmitorHeader) = "Undefined"; # # # Process Command line options # ---------------------------- &cmd_opt; # printf "..$NM-$Ver..\n"; # which version # # # Now Check and update the Versions # --------------------------------- &check_updates; # # &submit_spam if ( $SubmitSpam ); # # &process_spam if ( $ProcessSpam ); # printf "Done\n\n"; # exit 0; # done # # # # ================================================================= # # Just Submit the Spam ( just send it in ) # -------------------- # sub submit_spam { # # # Spam to submit to the repository for analysis # if ( ! -f $SRC ) { printf "\nERROR: Aborting, No such mailbox=$SRC\n\n"; exit 1; } # # my ( $cmd ) = "mail -v -s 'incoming spam' $SpamCollectorEmail < $SRC "; # # printf "\n"; printf "Finished Submitting the unwanted spam in $SRC\n"; printf "You should receive an automated confirmation reply in a few minutes\n"; printf "\t$cmd\n\n"; ` $cmd `; # # Sun Dec 29 09:00:06 PST 2002 my ( @dt ) = split ( /\s+/, ` $DATE ` ); my ( $yr, $mid, $date, $uniqtime ) = &year_mon_date_time ( $dt[5], $dt[1], $dt[2], $dt[3] ); # ` mv $SRC $SRC.$mid.$date.$uniqtime `; # # } # submit_spam # # # Where to put the Processed Spam # ------------------------------- # sub process_spam { # if ( ! -d "$WEB/$DST" ) { printf "WARNING: Spam directory=$DST does NOT exist\n\n"; # exit 1; } if ( ! -d "$WEB/$ORG" ) { printf "WARNING: Spam directory=$ORG does NOT exist\n\n"; # exit 1; } # # # wait a little for mj to finish writing and syncing the mailbox # if ( ! -f $SRC ) { printf "\nERROR: Aborting, No such mailbox=$SRC\n\n"; exit 1; } # # For some reason, /var/spool/mail/spamcoll mailbox does NOT yet have the incoming spam # # if ( $DEBUG ) { @ls = `ls -la $SRC`; $l = join ( " ", @ls ); # printf "Processing incoming Spam=$SRC..\n\t$l..\n"; # # } # # if ( -z $SRC ) { printf "\nWARNING: No un-processed incoming spam=$SRC\n\n"; return 0; } # # # Need to clock it to prevent other incoming emails, # since we are gonna delete the mailbox after processing # my ( $LOCK ) = "${SRC}.lock"; # ` touch $LOCK ` ; # printf "Processing incoming Spam=$SRC..lock=$LOCK..\n"; # my ( $status ) = open ( FH, "< $SRC" ); # if ( $status <= 0 ) { printf "ERROR: Cannot read $SRC..\n"; exit 1; } # my ( $line ) = ""; my ( $lcnt ) = 0; # my ( $header ) = 1; my ( $spam ) = ""; my ( $msg ) = ""; # # Read the Incoming Emails # ------------------------ # while ( ) { # chomp ( $line = $_ ); # # printf "..header=$header..line[$lcnt]=$line..\n"; # # Local files to process, or incoming Spam being submitted # # From -- will be the user submitting the unwanted spam # >From -- will be the start of the Spam # # ( $header, $spamtype, $spam ) = &check_from ( $line ) if ( ( s/^From /From / ) || ( s/^>From />From / )); # # # so that an email reader can read the spam # $line =~ s/^>From /From /; # # # Process/Save only the header, original content is intact in *.done # printf SP "%s\n", $line if ( $header ); # # # Blank line between email header and email body # - watch out for blank lines in the email body # # if ( $header && ( "$line" eq "" ) ) { # $header = 0; # $msg = "This is the spammer header info, the spam itself was truncated "; $msg = "This user submited their unwanted Spam" if ( $spamtype eq "Submitor" ); # printf SP "-- $msg \n-- See $ORG/$SpamSubmitorFile.orig for the spam in its entirty\n"; printf SP "\n"; # } # done headers # $lcnt += 1; # } # end of incoming emails # # Close the last spam # &spam_done ( 1, $spam ); # relative to WEB/DST # close ( FH ); # # # use the First From time as its unique Date/Time stamp # $cmd = "$MV $SRC $WEB/$ORG/$SpamSubmitorFile.orig "; # SDIR/$Time.orig `; # printf "$cmd\n"; # # 13-May-03 amo Added chmod 644 *.orig # # # ` $cmd `; ` $cmd ; chmod 644 $WEB/$ORG/$SpamSubmitorFile.orig `; # unlink $LOCK ; # # } # process_spam # # # Use the Date stamp of the spam as its unique filename, ( From-Date.Process-Date ) # # From -- will be the user submitting the unwanted spam # >From -- will be the start of the Spam # sub check_from { my ( $line ) = $_[0]; # printf "..From=$line..\n"; # if ( $DEBUG ); # # # From user@Their-Domain.com Mon Dec 23 11:36:36 2002 # my ( $From, $ud, $Dow, $Mon, $Date, $Time, $Year ) = split ( /\s+/, $line ); $From = "x"; $ud = "x"; # get rid of dumb warnings # # # # Check the Spam Directory tree # ----------------------------- my ( $ymdt ) = &check_spam_dir ( $Year, $Mon, $Date, $Time ); # yr/mon/date/hh.mm.ss # # # Close the previous spam # ----------------------- my ( $spamtype, $spam ) = &spam_done ( 0, $ymdt ); # year/mon/date/hh.mm.ss[.submitor] elative path to WEB/DST # return ( 1, $spamtype, $spam ); # start new spam # } # check_from # # # Done extracting the headers of the submittor or the unwanted Spam # ----------------------------------------------------------------- # sub spam_done { # my ( $done ) = $_[0]; # done my ( $spam ) = $_[1]; # year/mon/date/hh.mm.ss # my ( $SpamType ) = "spam"; my ( $status ) = 0; # # # Check for existing spam filenames # if ( -f "$WEB/$DST/$spam" || -f "$WEB/$ORG/$spam.Submitor" ) { # printf "WARNING: spam=$DST/$spam already exists\n" if ( $DEBUG ); # # Add local processing time for unique spamFileName # # Sun Dec 29 09:00:06 PST 2002 my ( @dt ) = split ( /\s+/, ` $DATE ` ); my ( $yr, $mid, $date, $uniqtime ) = &year_mon_date_time ( $dt[5], $dt[1], $dt[2], $dt[3] ); # $spam .= ",$uniqtime"; # } # uniq spam file name # # # First (From) time is the submittor of the unwanted spam # if ( $SpamSubmitorFile eq "Undefined" ) { # # about to process the submitors header info $SpamSubmitorHeader = 1; # # $spam .= ".Submitor"; # $SpamSubmitorFile = "$spam"; # assign it once $SpamType = "Submitor"; # } elsif ( $SpamSubmitorHeader ) { # $SpamSubmitorHeader = 0; # # 2nd time thru, we just finished processing the submitors header info # $SpamType = "Submitor"; # # printf "..done $SpamType..submitor=$SpamSubmitorFile..\n"; # # About to process the spam # $SpamType = "spam"; # } else { # $SpamType = "spam"; # just to do something # # rest of the file is all spammers # # printf "..done $SpamType..submitor=$SpamSubmitorFile..\n"; # } # close ( SP ); # close previous spam # # # Nothing left to do # if ( ! $done ) { # # Full path to spam file only where needed # ----------------------------------------- if ( $SpamType eq "Submitor" ) { $status = open ( SP, "> $WEB/$ORG/$spam" ); } else { $status = open ( SP, "> $WEB/$DST/$spam" ); } # printf "ERROR: Could not open file=$spam..\n" if ( $status != 1 ); # # printf "....Saving a new $SpamType email[$status]: $spam\n"; printf ".... new $SpamType: $spam\n"; # } # done # return ( $SpamType, $spam ); # submitor or spam # } # spam_done # # # Check where to Save all the incoming Spams # ------------------------------------------ # sub check_spam_dir { my ( $y ) = "$_[0]"; # 2002 my ( $m ) = "$_[1]"; # Dec my ( $d ) = "$_[2]"; # 29 my ( $t ) = "$_[3]"; # time hh.mm.ss # # # Create unique filename for each incoming spam # my ( $yr, $mid, $date, $time ) = &year_mon_date_time ( $y, $m, $d, $t ); # # printf "..$yr..$mid..$date..$time..\n"; # # # Main Spam directory # -------------------- # my ( $sdir ) = "$WEB/$DST/$yr/$mid/$date"; # if ( ! -d "$sdir" ) { printf "WARNING: Creating new SpamDirectory.Head=$sdir..\n"; ` mkdir -p $sdir `; } # spam directory # my ( $odir ) = "$WEB/$ORG/$yr/$mid/$date"; if ( ! -d "$odir" ) { printf "WARNING: Creating new SpamDirectory.Orig=$odir..\n"; ` mkdir -p $odir `; } # spam directory # # return ( "$yr/$mid/$date/$time" ); # ymdt # } # check_spam_dir # # # Multiple people could have gotten a spam at the same mon/day/mm.hh.ss # use a counter for distinction # sub year_mon_date_time { my ( $y ) = "$_[0]"; # 2002 my ( $m ) = "$_[1]"; # Dec my ( $d ) = "$_[2]"; # 29 my ( $t ) = "$_[3]"; # time hh.mm.ss # # Convert Months into a number # my ( @MID ) = (); $MID{ "Jan" } = "01"; $MID{ "Feb" } = "02"; $MID{ "Mar" } = "03"; $MID{ "Apr" } = "04"; $MID{ "May" } = "05"; $MID{ "Jun" } = "06"; $MID{ "Jul" } = "07"; $MID{ "Aug" } = "08"; $MID{ "Sep" } = "09"; $MID{ "Oct" } = "10"; $MID{ "Nov" } = "11"; $MID{ "Dec" } = "12"; # # my ( $date ) = sprintf "%02d", $d; # 01 for the 1st of the month # my ( $mid ) = $MID{ $m }; # $t =~ s/:/./g; # use time as part of the filename # return ( $y, $mid, $date, $t ); # } # year_mon_date_time # # # Check for any updates # --------------------- # sub check_updates { # # Get the Latest copy and compare it # ---------------------------------- # if ( ! -x $WGET ) { # printf "\n"; printf "WARNING: Could NOT find $WGET for updating $0..\n"; # return ( 0 ); # just return, pretend its up to date } # my ( $new ) = "cd /tmp ; rm -f $NM ; $WGET -q http://www.UCEAS.net/Scripts/$NM "; # printf "\n"; printf "Checking against the master copy: http://www.UCEAS.net/Scripts \n"; printf "\t $new \n" if ( $DEBUG ); ` $new `; # # # my ( $diff ) = "diff /tmp/$NM $0" ; my ( @diffs ) = ` $diff `; # my ( $update ) = "chmod 754 /tmp/$NM ; cp /tmp/$NM $0 "; # # # Update before executing # if ( $#diffs > 0 ) { # # Found Differences # printf "\n"; printf "WARNING: Differences encountered, Please update first\n"; printf "\t $0 -update \n"; printf "\t -- or --\n"; printf "\t $update \n\n"; # printf " $diff \n"; printf "%s\n", join ( " ", @diffs ); printf "\n"; # if ( $Update == 1 ) { # printf "WARNING: Updating: $update\n"; ` $update `; # # now need to respawn with the new updated script # ------------------------------------------------ # ` exec $0 `; # } else { exit 1; } # } else { # printf "\t $0 is up to date\n"; # } # } # check_updates # # # Help File # ------------ # sub usage { printf "\n"; printf "$NM\n"; printf "\t -h \t these help files\n"; printf "\t -v \t which version is this\n"; printf "\t -update \t check for latest version and updates before executing\n"; printf "\n"; printf " For Users:\n"; printf "\t -submit \t\t\t Submit your unwanted spam to the SpamCollector Servers \n"; printf "\t -test \t\t\t Submit your unwanted spam to the 'test' SpamCollector Servers \n"; printf "\t -user /var/spool/mail/you \t your mailbox with spam in it\n"; printf "\n"; printf "\n"; printf " For SpamCollector Servers:\n"; printf "\t -process \t\t\t Processing the incoming unwanted spam from submitters \n"; printf "\t -user /var/spool/mail/spamcoll \t incoming spam to process \n"; printf "\t -web /home/httpd/html/SpamCollector \t Deposit Processed Spam here\n"; printf "\n"; printf "\n"; exit 1; # } # usage # # # Process command line options # ----------------------------- # sub cmd_opt { my ( $id ) = 0; # my ( $argv ) = ""; # # while ( $id <= $#ARGV ) { # $argv = $ARGV[$id]; # # printf "..argv[$id]=$argv..\n"; # if ( "$argv" eq "-h" ) { # &usage; # help file # } elsif ( "$argv" eq "-v" ) { # printf "$NM-$Ver\n\n"; # which version exit 0; # } elsif ( "$argv" eq "-update" ) { # $Update = 1; # check for updates before executing # } elsif ( "$argv" eq "-process" ) { # $ProcessSpam = 1; # Process the incoming unwanted Spam # $SRC = "/var/spool/mail/spamcoll"; # } elsif ( "$argv" eq "-submit" || "$argv" eq "-test" ) { # $SubmitSpam = 1; # Submit your unwanted Spam # $SRC = "/var/spool/mail/your-mailbox-with-unwanted-spam"; # $SpamCollectorEmail = "SpamCollectorTest\@UCEAS.net" if ( "$argv" eq "-test" ); # } elsif ( "$argv" eq "-user" ) { # $id += 1; $SRC = $ARGV[$id]; # use /var/spool/mail/USER # } elsif ( "$argv" eq "-web" ) { # $SubmitSpam = 0; # we are NOT submitting it, but are processing the incoming spam # $id += 1; $WEB = $ARGV[$id]; # use /home/httpd/html # } elsif ( "$argv" eq "-head" ) { # $SubmitSpam = 0; # we are NOT submitting it, but are processing the incoming spam # $id += 1; $DST = $ARGV[$id]; # use WEB/SpamCollector.Head # } elsif ( "$argv" eq "-orig" ) { # $SubmitSpam = 0; # we are NOT submitting it, but are processing the incoming spam # $id += 1; $ORG = $ARGV[$id]; # use WEB/SpamCollector.Orig # } # $id += 1; } # } # cmd_opt # # # End of file