#!/usr/bin/perl
#
# clamnailer - convert Julian Field's anti-phishing address list into
# a ClamAV database. Note that a little adaptation is required to
# install the scamnailer.ndb file into clamd's database directory.
#
# See http://www.scamnailer.info/
# http://www.jules.fm/Logbook/files/anti-phishing-v2.html
#
# Written by Tony Finch <dot@dotat.at> <fanf2@cam.ac.uk>
# at the University of Cambridge Computing Service.
# You may do anything with it, at your own risk.
# http://creativecommons.org/publicdomain/zero/1.0/
#
# $Cambridge: hermes/conf/clamav/sbin/clamnailer,v 1.9 2009/10/14 15:32:30 fanf2 Exp $

use warnings;
use strict;

use POSIX;

# clamd's database directory
my $db = "/var/db/clamav";
# ScamNailer download directory
my $work = "/var/db/clamnailer";
# ScamNailer download location
my $url = "http://cdn.mailscanner.info/";

chdir $work or die "chdir $work: $!\n";

my $latest = `dig +short TXT emails.msupdate.greylist.bastionmail.com.`;
die "$0: failed to get latest version from DNS\n" if $?;

$latest =~ /^"(emails[.][0-9-]+)[.]([0-9]+)"$/
    or die "$0: could not parse latest version string $latest\n";
my $latest_base = $1;
my $latest_patch = $2;

my $base = readlink "current";
die "readlink $work/current: $!" unless defined $base or $! == ENOENT;

if (!defined $base or $base ne $latest_base) {
	system "ftp $url/$latest_base > /dev/null" and die "ftp failed\n";
	unlink 'current', glob "$base*" if defined $base;
	symlink $latest_base, 'current' or die "symlink $work/current: $!\n";
	$base = $latest_base;
}

my %addr;
open my $in, '<', $base
    or die "open $work/$base: $!\n";
while (<$in>) {
	/^#/ and next;
	s/\s//g;
	$addr{$_} = 1;
}
close $in;

for my $patch (1 .. $latest_patch) {
	my $file = "$base.$patch";
	system "ftp $url/$file" and die "ftp failed\n" unless -f $file;
	open my $in, '<', $file
	    or die "open $work/$file: $!\n";
	while (<$in>) {
		/^([<>]) (\S+)$/ or next;
		if ($1 eq '>') {
			$addr{$2} = 1;
		} else {
			delete $addr{$2};
		}
	}
	close $in;
}

my $delim_hex = unpack "H*", "?!#\$%&~^*+=,:;()<>[]{}/|\\\`\'\"\t ";
my $delim = join '|', $delim_hex =~ m/../g;

open my $out, '>', 'scamnailer.tmp'
    or die "open > $work/scamnailer.tmp: $!\n";
for my $addr (keys %addr) {
	my $hash = crypt $addr, "..";
	$hash =~ s/[^0-9A-Za-z]//g;
	# 4 indicates a mail file
	# * indicates any position
	printf $out "ScamNailer.Phish.%s:4:*:(%s)%s(%s)\n",
	    $hash, $delim, unpack("H*", $addr), $delim;
}
close $out or die "write > $work/scamnailer.tmp: $!\n";

# only install if file has changed
if (system "diff -q scamnailer.tmp scamnailer.ndb > /dev/null 2>&1") {
	# keep a local copy for future comparisons
	system "cp scamnailer.tmp scamnailer.ndb" and die "cp failed\n";
	# install where clamd will pick it up
	rename "scamnailer.tmp", "$db/scamnailer.ndb"
	    or die "rename $work/scamnailer.tmp -> $db/scamnailer.ndb: $!\n";
} else {
	unlink "scamnailer.tmp";
}

# end
