#!/usr/bin/perl
# 
# usage: script [-n] https://ruleqa.spamassassin.org/reporturl freqsfile
#
# to be run at the top-level of the SVN checkout

use strict;
use warnings;

my $sendmail = 1;
my $rurl = shift @ARGV;
if ($rurl eq '-n') { $rurl = shift @ARGV; $sendmail = 0; }
my $freqsfile = shift @ARGV;

# ---------------------------------------------------------------------------
# get the last rev

open (IN, "svn info rulesrc".
            "< /dev/null |");

my $lcrev;
while (<IN>) {
  /^Last Changed Rev: (\d+)/ and $lcrev = $1;
}
close IN;

die "no 'Last Changed Rev:' found in 'svn info .' output" unless $lcrev;
die "non-numeric LCRev $lcrev" unless ($lcrev > 1);

# ---------------------------------------------------------------------------
# see if we've already mailed about that rev
if (open (LAST, "</tmp/mail_freqs.log")) {
  my $lastline;
  while (<LAST>) { $lastline = $_; }
  close LAST;
  if ($lastline && $lastline =~ /r=(\d+)/) {
    my $donerev = $1;

    if ($lcrev <= $donerev) {
      print "already mailed about rules in r$lcrev, exiting.\n";
      exit;
    }
  }
}

# ---------------------------------------------------------------------------
# we have the revision; now find the rule names in that diff
my $prevrev = $lcrev - 1;

open (DIFF, "svn diff -r $prevrev:$lcrev rulesrc".
            "< /dev/null |");

my @files = ();
my %poss_rules = ();
while (!eof DIFF) {
  my $file;
  while (<DIFF>) {
    if (/^\s*\+\+\+\s+(.*?)\s+\(revision/) {
      $file = $1; # file header
      last;
    }
  }

  last unless $file;        # EOF

  if ($file !~ /\.cf$/i) {
    print "no rules in non-rule file $file\n";
    next;
  }

  # ignore nightly rule score generation output
  if ($file eq 'rulesrc/scores/72_scores.cf') {
    print "only scores in file: $file\n";
    next;
  }

  # ok, we're reading the diff for a rules file.
  push @files, $file;
  while (<DIFF>) {
    /^Index: / and last;        # next file header

    s/#,*$//;     # strip comments

    # just look for "+anything NAME_OF_RULE anything"; if we get
    # a little overexcited, it won't cause any harm
    if (/^\+\s*\S+\s+(\S+)\s/) {
      # could be a rule, let's add it to the list
      $poss_rules{$1}++;
    }
  }
}
close DIFF or die "svn diff failed";

my @rules = sort keys %poss_rules;
if (@rules <= 0) {
  print "no rules found in checkin, ignoring.\n";
  exit 0;
}

# print "mailing about changed/new rules: ".join(' ', @rules)."\n";
my $files = join (' ', @files);

# ---------------------------------------------------------------------------
# get the SVN log entry
open (LOG, "svn log --xml -r $lcrev rulesrc".
            "< /dev/null |");

my $xml = join('', <LOG>);
close LOG or die "svn log failed";
$xml =~ /<author>\s*(.*?)\s*<\/author>/s; my $svn_author = $1;
$xml =~ /<msg>\s*(.*?)\s*<\/msg>/s; my $svn_msg = $1;

# ---------------------------------------------------------------------------
# and the freqs we just built
my %freqs = ();
my $freqs_hdr = '';
open (FREQS, "<$freqsfile")
        or warn "cannot open '$freqsfile'";
while (<FREQS>) {
  / (?:\(all messages|SPAM\%)/ and $freqs_hdr .= $_;
  /(.* )(\S+)\s*$/ or next;
  $freqs{$2} = $1.$2;
}
close FREQS;

my $mcname = `pwd`;
$mcname =~ /\/(mc-[^\/]*)\// and $mcname = $1;

# ---------------------------------------------------------------------------
# output each of the found rules' freqs
my $tomail = 
"From: jm\@apache.org (SVN Freqs Daemon)
To: $svn_author\@apache.org
Subject: svn freqs: r$lcrev - your recently-modified rules

Author: $svn_author
Files: $files
Log:
$svn_msg

Freqs for those rules in '$mcname' mass-check:
$freqs_hdr";

foreach my $r (@rules) {
  my $f = $freqs{$r} || $freqs{"T_".$r};
  next unless $f;
  $tomail .= $f."\n";
}

$tomail .= "
More: $rurl
";

# ---------------------------------------------------------------------------
# and mail

if ($sendmail) {
  open (MAIL, "| /usr/sbin/sendmail -oi -t") or die "cannot run sendmail";
  print MAIL $tomail;
  close MAIL or die "sendmail failed";
} else {
  print $tomail;
}

# ---------------------------------------------------------------------------
# log it, so we don't mail about that rev again

open (LOG, ">>/tmp/mail_freqs.log");
print LOG "r=$lcrev\n";
close LOG or warn "failed to write to /tmp/mail_freqs.log";

exit;

