############################################################# # # Sample .procmailrc for semi-automatic whitelist/greenlist # See http://www.panix.com/~rbean/procmail/ SHELL=/bin/sh LINEBUF=16384 # copied from /etc/procmailrc MAILDIR=$HOME/.maildir PMDIR=$HOME/.procmail LOGFILE=$PMDIR/log LOGABSTRACT=all VERBOSE=no COMSAT=no ## Newline for log entries-- note this ends on the next line NL=" " # WLDIR is the directory for whitelist files WLDIR=$PMDIR # TRASH is used by SpamAssassin #TRASH=$MAILDIR/.Trash # ENVELOPE is the header line that contains the "envelope address" ENVELOPE="X-Original-To:" # DOMAIN is borrowed from Spambouncer # I use it as a sanity check. DOMAIN=panix.com # MYEMAIL is a list of all of my own email addresses. # I borrowed the name from Spambouncer, but the list # is generated from the subaddress data file by a # shell script. I use it to make sure I don't # accidentally add my own address to the whitelist. MYEMAIL=$HOME/.myemail # SUBRC is an INCLUDERC file generated by a shell script, # using information in the subaddress data file. This is # a way to get data from a file without grepping it # (see below). SUBRC=$PMDIR/subdata.rc ## $AWORD is, in theory, every character that might ## ever show up in an email address (from Mike Peeler). ## (note it has a "+" tacked onto the end of it) AWORD="[]A-Za-z_0-9.*=#|$?!/&~[^+-]+" #################################################################### # # Extract the subaddress from $ENVELOPE # # Based on a recipe by Dallman Ross # # This will match either $LOGNAME+subaddress@ or # subaddress@$LOGNAME. If there's no subaddress then # SUBADDR=$LOGNAME # # Note that "\/" only marks the left side of $MATCH, so # the last condition removes any trailing characters. # # The number "9876543210" has no special meaning, it's just # "a large number that's easy to type". # # See "man procmailsc" to understand what "^0" does. # # If your ISP uses "-" instead of "+" as the subaddress # separator, change "$\LOGNAME\+" to "$\LOGNAME\-" in the # second condition line. MATCH # unset any previous value, just in case :0 * $ 9876543210^0 ^$ENVELOPE \/.+@$\LOGNAME\. * $ 9876543210^0 ^$ENVELOPE $\LOGNAME\+\/[^@]+ * $ 9876543210^0 ^$ENVELOPE \/$\LOGNAME@ * MATCH ?? ^^\/[^@]+ { SUBADDR=$MATCH } #################################################################### # # If there's a subaddress, figure out what it means... # # I need to know the corresponding foldername, whitelistname, and # whether $SUBADDR is the add-code for the whitelist. I could just # grep the whole line from the subaddress data file, like this: # # SUBDATA=`fgrep -i "$SUBADDR" $SUBFILE` # # But there's a catch-- procmail wants to feed the message to STDIN, # and there's nothing to receive it, which can cause spurious error # messages. This is discussed at: # http://pm-doc.sourceforge.net/pm-tips-body.html#how_to_run_an_extra # # So instead I've used a shell script to generate an INCLUDERC # file with the data hard-coded into it. When I edit the # subaddresss data file, can I re-run the script to generate a # new INCLUDERC file. For ease of maintenance, it just stuffs the # data into variables and returns here. As a bonus, loading an # INCLUDERC file is more efficient than launching fgrep. # # NOTE: The subaddress is interpreted as a regular expression, # so don't use any characters that might be misinterpreted. # Specifically that means don't use ^*+?@\|()[]<>$ # # [ -e $SUBRC ] checks to see if $SUBRC exists and has a # non-zero length. This avoids error messages by making # sure we don't attempt the impossible... :0 * $ SUBADDR ?? $AWORD * $ ! SUBADDR ?? $LOGNAME * $ ? [ -e $SUBRC ] { INCLUDERC=$SUBRC ## If WLNAME is empty at this point, then ## we have a subaddress but it's not valid. ## If it's one that's been "retired" because ## it's attracting too much spam, this is a ## good opportunity to dump it. If it's one ## that's never been used, then someone is ## guessing... :0 * $ ! WLNAME ?? $AWORD { COMSAT=no LOG="**Non-valid subaddress**"$NL :0 /dev/null } } #################################################################### # # If $SUBADDR is $ADDCODE, add the addressee to the appropriate # whitelist, and discard the message. # # Optionally, you could deliver the message to $FOLDERNAME if you # want to keep it as a file copy, but most mail clients have # other ways to do that. Note that in either case, the Bcc: or # X-Original-To: header will contain your secret $ADDCODE, so # be careful what you do with those filed copies. # # The recipe for extracting the address comes from Mike Peeler. # I'm not using ^TO because I specifically want the first # address on the To: line, ignoring any other recipients. # (actually if one address is in angle brackets and one isn't, # it will favor the one in brackets even if it's not first). # # Additional sanity check: make sure it's from me # (I always use Reply-To, spammers seldom do) # # Note that this part only runs when I *send* email, so # some inefficiencies here are probably OK. :0 * $ SUBADDR ?? $ADDCODE * $ ^Reply-To:.*$LOGNAME.*$DOMAIN { ADRESSEE :0 * $ ^To:.*<\/$AWORD@$AWORD> * $ MATCH ?? ^^\/$AWORD@$AWORD { ADDRESSEE = $MATCH } # This only runs if the above recipe failed (E="else") :0 E * $ ^To:.*\/$AWORD@$AWORD { ADDRESSEE = $MATCH } ## At this point I want to make sure that $ADDRESSEE is not ## in either $WLNAME or $MYEMAIL before I add it to $WLNAME. ## This could be done with a single grep of two files, but ## only if both files exist, and I don't want to assume that. ## Checking each file separately allows things to fail ## gracefully. ## Make sure $ADRESSEE contains something first, so I don't ## accidentally add a blank line to the whitelist... ## ## The second condition here should catch any attempt to add ## one of my own addresses to the whitelist, but if you're ## worried about partial matches on someone else's address, ## you could remove it (see below). :0 * $ ADDRESSEE ?? $AWORD * $ ! ADDRESSEE ?? $LOGNAME.*$DOMAIN { ## If $WLNAME doesn't exist the append (below) will create it, ## so it's OK to continue, but if it does exist ## I want to avoid duplicates that would have to be ## removed later. ## ## fgrep uses exact matches only (no regular expressions) ## -i means ignore case (email addresses might get downcased) ## -q means don't return the result, just success or failure ## -x means match complete lines only :0 * $ ? [ -s $WLDIR/$WLNAME ] * $ ? fgrep -iqx "$ADDRESSEE" $WLDIR/$WLNAME { # Address already in whitelist-- discard silently COMSAT=no LOGABSTRACT=no :0 /dev/null } ## Make sure it's not one of my own addresses. ## This should be caught by: ## * $ ! ADDRESSEE ?? $LOGIN.*$DOMAIN ## above, but checking $MYEMAIL may be preferable ## if you're worried about partial matches. :0 * $ ? [ -s $MYEMAIL ] * $ ? fgrep -iqx "$ADDRESSEE" $MYEMAIL { COMSAT=no LOGABSTRACT=no :0 /dev/null } ## If I get here then it's OK to add the address. ## The append to $WLNAME is a side-effect of the ## condition line (which always returns "true"). ## This idea came from: ## http://pm-doc.sourceforge.net/pm-tips-body.html#how_to_run_an_extra :0 * $ ? echo "$ADDRESSEE" >>$WLDIR/$WLNAME { COMSAT=no LOG="@@Add to Whitelist: $ADDRESSEE@@"$NL :0 /dev/null } } } #################################################################### # # This would be a good place to check for viruses, because they # attack address book files, so you might get one from someone # in your whitelist. # # Some good virus checkers are listed at: # http://www.ii.com/internet/robots/procmail/qs/#viruses #################################################################### # # Whitelist: # If $FROM is in $WLNAME, then deliver to $FOLDERNAME, otherwise # fall through. # # If $FOLDERNAME doesn't exist, procmail will create it. # # Getting $FROM is a bit of a hack, but a lot of people do it # this way-- it's telling formail to generate a reply header, # and then extracting the "To:" address from that header. # This will choose the "best" address from among "Reply-To:", # "From:", "Sender:", etc (note that leaving out the -t flag # would change its priorities in ways that you might not want). # # I could check for both "From:" and "Reply-To:" but this seems # to Do The Right Thing, and it's simpler. :0 * $ WLNAME ?? $AWORD * $ ? [ -s $WLDIR/$WLNAME ] { FROM=`formail -rtzxTo:` :0 * $ ? fgrep -iqx "$FROM" $WLDIR/$WLNAME { LOG="==Whitelist OK=="$NL :0 $MAILDIR/$FOLDERNAME } } #################################################################### # # Spam filtering goes here # # Before I call the spam filter, I do a few simple checks for things # that are pretty much guaranteed to be spam. For example, I only # accept Bcc's from people who are on my whitelist, so any that get # this far are spam (this used to catch almost all spam, and it still # catches quite a bit). I also delete anything with two "hi-bit" # characters in a row in the Subject line (there shouldn't be # *any*, but I check for two in case someone gets carried away # with fancy punctuation marks-- many people don't realise those # are non-standard). Finally, I've never seen a real person # use my login name in the Subject line, but a surprising number # of spammers still do that. # # These checks aren't really necessary, but they save the # overhead of running a big complicated spam filter on every # message. ################ ## Bcc: mail :0 * $ ! ^TO$LOGNAME { COMSAT=no LOG="**Bcc Mail**"$NL :0 /dev/null } ################ ## Hi-bit characters in subject ## In the brackets is caret-tab-space-dash-tilde ## (space through tilde is all printable characters, ## tab is also acceptable, caret negates the set) :0 * ^Subject.*[^ -~][^ -~] { COMSAT=no LOG="**Hi-Bit**"$NL :0 /dev/null } ################ ## $LOGNAME in subject :0 * $ ^Subject.*$LOGNAME { COMSAT=no LOG="**$LOGNAME in Subject**"$NL :0 /dev/null } ## INCLUDERC=/net/local/filters/rc.spamassassin #################################################################### # # Now that we're past the spam filter, deliver to the appropriate # folder. :0 * $ FOLDERNAME ?? $AWORD { LOG="==Passed Spam Filter=="$NL :0 $MAILDIR/$FOLDERNAME } #################################################################### # # Anything left? Deliver it somewhere. # # The only thing that should get this far is mail that has no # subaddress, and therefore no $FOLDERNAME. (Mail with a # non-valid subaddress was discarded earlier.) LOG="==No Subaddress=="$NL :0: $DEFAULT # EOF