#!/usr/bin/perl -w
# and once again for Emacs: -*-perl-*-

# NoCeM in the spool for C News and INN - grand unified program
# Written by Olaf Titz <olaf@bigred.inka.de>, May 1997 - Jan 1999.
# This program is in the public domain.
$ID='$Id: c-nocem.in,v 1.9 1999/02/19 17:43:31 olaf Exp $ %1999-02-19 18:39:38%';

# For C News:
# set up a batched feed like this:
#   process-nocem:alt.nocem.misc/all:f:
# with a batchparms entry:
#   process-nocem  N  100000  -  c-nocem -b -s
# and make sure to call "sendbatches -c N" regularly.

# For INN:
# set up a channel feed like this:
#   process-nocem:alt.nocem.misc:Tc,Wn:/path/to/c-nocem -c30

# The program will emit some logging on stdout unless -s is given.

# ----------------------------------------------------------------------------
# Configuration

# not used
#$pathnews="/var/lib/news";
# c-nocem configuration files
$pathetc='/etc/news/ncm';
# active file directory
$pathdb="/var/lib/news";
# news spool
$patharticles="/var/spool/news";
$tmp="/var/lib/news/tmp"; # create this directory, do not use world writable /tmp
$logfile="/var/log/news/log";
$ENV{'PATH'}="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/lib/news/bin";

require "getopts.pl";
$opt_t=86400; $opt_l=$opt_a="";
&Getopts("bc:t:nsd:klraz:");
$storageapi=0;
if ($opt_c) {
    $pathlog="";
    eval `innconfval -p 2>/dev/null`;
    if ($storageapi=~/^(1|yes|true|on)/i) {
	$storageapi=1;
    } else {
	$storageapi=0;
    }
    $logfile="$pathlog/news" if ($pathlog);
}
chdir $patharticles || die "chdir $patharticles: $!";
undef $opt_s if ($opt_n);

$pubring="$pathetc/ncmring.pgp";
$perm="$pathetc/ncmperm";
$groups="$pathetc/ncmgroups";
$inputfile="$tmp/nocem.input";
$batchfile="$tmp/nocem";
$PGP="/usr/bin/gpg --batch -q --keyring $pubring";
$ENV{'PGPPATH'}="/etc/news/pgp";
$ENV{'LANG'}="en";

# ----------------------------------------------------------------------------
if ($ID=~/Id:\s+(\S+\s+\S+\s+\S+)[^%]*%([^%]+)/) {
    $ID="$1 ($2)";
} elsif ($ID=~/Id:\s+(\S+\s+\S+\s+\S+)/) {
    $ID="$1";
}
select STDOUT; $|=1;
@Month=("Jan", "Feb", "Mar", "Apr", "May", "Jun",
	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
if ($opt_s) {
    open(STDOUT, ">/dev/null") || die "open /dev/null: $!";
}
if ($opt_l) {
    $logformat="";
} elsif ($opt_c) {
    $logformat="%s %2d %02d:%02d:%02d.000 + (NoCeM) #";  # INN log format
} else {
    $logformat="%s %2d %02d:%02d:%02d.000 (NoCeM) + #";  # C News log format
}
$opt_k=($opt_k) ? "c" : "";
$opt_r=($opt_r) ? "r" : "";
if ($opt_d) {
    &compdelay($opt_d);
    $SIG{'HUP'}="nodelay";
} else {
    $del_one=$del_run=0;
    $SIG{'HUP'}="shutdown";
}
$LOCK_EX=2; # for flock
$LOCK_UN=8; # for flock
eval { $x="";select($x, $x, $x, 0); }; $HaveSelect=($@ eq "");
srand(time^$$);

$cal_sec=$cal_no=$sleeptime=$runcnt=0;
&deletethem unless ($opt_n); # process leftover batches

close P;
if ($opt_b) {
    $m=99999999;
} elsif ($opt_c) {
    $m=$opt_c;
}
$t=time;
$rtime=$t+$opt_t;

$SIG{"TERM"}="shutdown";
$SIG{"INT"}="shutdown";
print STDOUT "Starting $ID\n\n" unless ($opt_s);
&readconfig;
undef @input;
# Read the notice(s)
if ($opt_b || $opt_c) {
    $xdel=($del_one<$opt_t ? $del_one : $opt_t);
    while ($file=&readline($xdel/2-1)) {
	$ttt=time;
	$cal_sec+=$ttt-$t;
	$t=$ttt;
	if ($file=~/\n/) {
	    # batch up input
	    push(@input, $file);
	    ++$cal_no;
	}
	if (--$m<0 || $t>$rtime) {
	    &deletethem;
	    $m=($opt_c?$opt_c:99999999);
	    $rtime=$t+$opt_t;
	}
	if (-s $inputfile && open(I, $inputfile)) {
	    # read input side-channel
	    flock(I, $LOCK_EX);
	    push(@input, <I>);
	    unlink $inputfile;
	    close I;
	}
	@s=stat($perm);
	if ($ptime<$s[9]) {
	    &readconfig; # permissions file was changed
	}
	if (-s $groups) {
	    @s=stat(_);
	    if ($gtime<$s[9]) {
		&spawngc;
	    }
	}
	if ($t>=$ptime && $#input>=0) {
	    # process one item of batched input
	    @_=split(/[ \t\r\n]+/, shift(@input));
	    $pfile=$_[0];
	    print STDOUT "File $pfile:\n" unless ($opt_s);
	    &procfile("$pfile");
	    print STDOUT "\n" unless ($opt_s);
	    $ptime=$t+$del_one;
	}
	if ($opt_d && $cal_sec>21600) {
	    # Recompute delay times by averaging real article rate
	    &compdelay(86400*$cal_no/$cal_sec);
	    $cal_sec/=4; $cal_no/=4;
	}
    }
} else {
    print STDOUT "Processing stdin:\n" unless ($opt_s);
    &procfile("");
    print STDOUT "\n" unless ($opt_s);
}
unless ($opt_c && !$opt_n) {
    # channel closing could mean server shutdown, so just batch & exit
    &deletethem;
}
&shutdown("");
exit 0;

# ----------------------------------------------------------------------------
# Cancel all the articles in the batch file.

sub deletethem
{
    open(B, $batchfile) || return;
    if (!-s B) {
	close B; return;
    }
    ++$runcnt;
    printf STDOUT "Starting delete run \#%d\n\n", $runcnt unless ($opt_s);
    flock(BATCH, $LOCK_UN);
    close BATCH;
    if ($opt_n) {
	rename($batchfile, "$batchfile.$$");
	&spawngc;
    } else {
	while ($pid=fork, !defined($pid)) { &sleeep; }
	if ($pid) {
	    # master: $pid==first child
	    close B;
	    waitpid($pid, 0);
	    &spawngc;
	    return;
	}
	# first child: fork twice to avoid zombies
	while ($pid=fork, !defined($pid)) { &sleeep; }
	if ($pid) {
	    # first child: $pid==second child
	    rename($batchfile, "$batchfile.$pid");
	    open(B, ">>$batchfile.$pid"); # make sure its there (??)
	    exit;
	}
    }
    $tmpa="$batchfile.$$";
    $tmpb="$batchfile.$$.done";
    do {
	sleep $del_run;
    } while (!-f $tmpa || !-f $batchfile);
    undef %c;
    while (<B>) {
	($a,$b)=split;
	$c{$a}=1;
    }
    close B;

    $fcadd=$opt_c ? "-a3 3>>$tmpb" : "";
    if ($opt_n) {
	@_=localtime($TT=$ltime=time);
	$lf=sprintf($logformat, $Month[$_[4]], $_[3], $_[2], $_[1], $_[0]);
	open(C, "|fastcancel -di$opt_k$opt_r -f '$lf' $fcadd")
	    || die "invoke fastcancel: $!";
    } else {
	&lock || die "can't set the lock: $!";
	$SIG{"HUP"}="finish";
	$SIG{"TERM"}="finish";
	$SIG{"INT"}="finish";
	$SIG{"PIPE"}="IGNORE";
	@_=localtime($TT=$ltime=time);
	$lf=sprintf($logformat, $Month[$_[4]], $_[3], $_[2], $_[1], $_[0]);
	open(C,
          "|fastcancel -li$opt_k$opt_r -f '$lf' $fcadd >>$logfile")
	    || do { &unlock; die "invoke fastcancel: $!"; };
    }
    $i=0;
    while (($a,$b)=each %c, $a) {
	print C "$a\n";
	++$i;
    }
    close C;
    $ltime=time-$TT;
    unless ($opt_n) {
	&unlock;
	if ($opt_c && -s $tmpb) {
	    if ($storageapi) {
		system "fastrm -e -u -s $patharticles <$tmpb";
	    } else {
		system "sort $tmpb | fastrm -e -u -s $patharticles";
	    }
	}
	if ($opt_z && open(B, $tmpb) && open(D, ">>$opt_z")) {
	    flock(D, $LOCK_EX);
	    while (<B>) {
		print D;
	    }
	    close B; close D;
	}
	unlink $tmpa, $tmpb;
    }
    unless ($opt_s) {
	@_=times;
	printf STDOUT
	"Delete run #%d done, %d IDs; times u=%d, s=%d, w=%d, l=%d, r=%d\n\n",
	    $runcnt, $i, $_[0]+$_[2], $_[1]+$_[3],
	    $sleeptime, $ltime, time-$TT;
    }
    exit 0;
}


# ----------------------------------------------------------------------------
# Read one NoCeM notice. Collect Message-ID to be deleted in batch file.

sub procfile
{
    local($f)=@_;
    pipe(ER, EW);
    $tf="$tmp/noc.a$$";
    while ($pid=fork, !defined($pid)) { &sleeep; }
    # Start PGP child process
    if (!$pid) {
	open(STDERR, ">&EW");
	close(ER); close(EW);
	if ($storageapi) {
	    exec "sm $f | $PGP >$tf" || exit 99;
	} else {
	    if ($f) {
		exec "$PGP <$f >$tf" || exit 99;
	    } else {
		exec "$PGP >$tf" || exit 99;
	    }
	}
    }
    $s=$u="";
    close(EW);
    while(<ER>) {
	# read pgp's stderr
	/(\w+) signature from( user)?\s+(.*)$/ && do {
	    ($s,$u)=($1,$3);
	    # note: one file, one message
	};
    }
    close(ER); wait;
    if ($s=~/bad/i) {
	print STDOUT "  bad signature from $u\n";
	unlink($tf);
	return;
    }
    if (!$u) {
	print STDOUT "  missing/unknown signature\n";
	unlink($tf);
	return;
    }
    print STDOUT "  file signed by $u\n";

    # Now process the message
    open(F, $tf) || die "open $tf: $!";
    $i=$mids=0; undef @line;
    while (<F>) {
	tr/\r\n//d;
	/^\#/ && next;
	/^\s*$/ && next;
	/^\@\@\s*BEGIN\s+NCM\s+HEADERS/i && do {
	    $i=1; undef %H; next;
	};
	/^\@\@\s*BEGIN\s+NCM\s+BODY/i && do {
	    $i=&checkhdr($u);
	    next;
	};
	/^\@\@\s*END\s+NCM\s+BODY/i && do {
	    &nocmid(@line); undef @line; $i=0; next;
	};
	if ($i==1) {
	    # read a header line
	    ($hh,$h)=split(/:\s+/, $_, 2);
	    $hh=~tr/A-Z/a-z/;
	    $H{$hh}=$h;
	}
	elsif ($i==2) {
	    @l=split;
	    if ($l[0]=~/^\<[^>]+\>/) {
		&nocmid(@line);
		++$mids;
		@line=@l;
	    } else {
		push(@line, @l);
	    }
	}
    }
    &nocmid(@line);
    print STDOUT "    found $mids Message-ID tags\n";
    if ($i) {
	print STDOUT "    warning: NCM has no END tag\n";
    }
    close F;
    unlink($tf);
}

# ----------------------------------------------------------------------------
# Check if headers are okay. Argument is signator.
# Return: 2=okay, 3=skip

sub checkhdr
{
    local($u)=@_;

    if (!defined($H{"version"}) || $H{"version"}!~/^0\.9/) {
	printf STDOUT "    unknown version: %s\n", $H{"version"};
	return 3;
    }
    if (defined($H{"action"}) && $H{"action"}!~/^hide$/i) {
	printf STDOUT "    unsupported action: %s\n", $H{"action"};
	return 3;
    }
    printf STDOUT "    Type: %s, Notice-ID: %s\n", $H{"type"}, $H{"notice-id"};
    foreach $i (0..$#Pi) {
	if (($H{"issuer"}=~/$Pi[$i]/i) &&
	    (($Pt[$i] eq "*") || ($H{"type"}=~/$Pt[$i]/i))) {
	    if ($Py[$i]=~/^Y/i) {
		return 2;
	    } else {
		printf STDOUT "    disallowed by config\n";
		return 3;
	    }
	}
    }
    printf STDOUT "    disallowed by default\n";
    return 3;
}

# ----------------------------------------------------------------------------
# Process a line from NoCeM notice: <mid> group group... given as array

sub nocmid {
    local(@l)=@_;
    return if ($#l<0);
    if ($opt_a) {
	return unless (scalar grep($A{$_}, @l[1..$#l]));
    }
    print BATCH join(" ", @l), "\n";
}

# ----------------------------------------------------------------------------
# Read config

sub readconfig {
    local($i,$t,$y);
    print STDOUT "Reading permissions file\n" unless ($opt_s);
    open(P, $perm) || die "open $perm: $!";
    undef @Pi; undef @Pt; undef @Py;
    @s=stat(P);
    $ptime=$s[9];
    while (<P>) {
	s/^\s+//; s/\s+$//; /^\#/ && next; /^$/ && next;
	($i,$t,$y)=split;
	push(@Pi, $i); push(@Pt, $t); push(@Py, $y);
    }
    close P;
    &spawngc;
}

# Set up BATCH. Spawn groupcheck process if needed, read active if needed.
sub spawngc {
    if ($opt_a) {
	undef %A;
	open(A, "$pathdb/active") || die "open active: $!";
	while (<A>) {
	    ($a,$b,$c,$d)=split(/ /, $_);
    	    next if $d eq 'x';
	    $A{$a}=1;
	}
	close A;
    }
    if (-s $groups) {
	@s=stat(_);
	$gtime=$s[9];
	if ($opt_c) {
	    $gcp="groupcheck";
	    $gca=$groups;
	} else {
	    $gcp="gngp";
	    $gca="";
	    open(G, $groups) || die "open $groups";
	    # preprocess pattern list: kill whitespace etc
	    while (<G>) { $gca.=$_; }
	    close G;
	    $gca=~s/[ \t\n,\\]+/,/g;
	    $gca=~s/^,//; $gca=~s/,$//;
	}
	while ($pid=open(BATCH, "|-"), !defined($pid)) { &sleeep; }
	if (!$pid) {
	    close BATCH;
	    open(STDOUT, ">>$batchfile") || die "open $batchfile: $!";
	    flock(STDOUT, $LOCK_EX);
	    exec $gcp, $gca || die "exec $gcp: $!"
	}
    } else {
	open(BATCH, ">>$batchfile") || die "open $batchfile: $!";
	    flock(BATCH, $LOCK_EX);
    }
    select BATCH; $|=1;
}

# ----------------------------------------------------------------------------
# Acquire the news system master lock.

sub lock
{
    if ($opt_c) {
	# running under INN
	while (!&ctlinnd("pause", "processing NoCeM [$$]")) {
	    &sleeep;
	}
    } else {
	local($tmp,$lock) = ("$newsctl/L.$$", "$newsctl/LOCK");
	open(LOCK, ">$tmp") || return 0;
	print LOCK "$$\n"; close LOCK;
	while (!link($tmp, $lock)) {
	    (-f $tmp) || return 0;
	    &sleeep;
	}
	unlink($tmp);
    }
    return 1;
}

sub unlock
{
    if ($opt_c) {
	&ctlinnd("go", "processing NoCeM [$$]");
    } else {
	unlink "$newsctl/LOCK";
    }
}

# ----------------------------------------------------------------------------
# Read a line from STDIN without buffering, with timeout.
# Return undef for error, empty string for EOF, "\0" for timeout,
# \n-terminated string otherwise.

sub readline
{
    local($t)=@_;
    if ($HaveSelect) {
	local($r,$n)=("",0);
	vec($r, fileno(STDIN), 1)=1;
	($n,$t)=select($r, "", "", $t);
	return "\0" if (!$n);
    }
    local($x,$c)=("", "");
    do {
	$_=sysread(STDIN, $c, 1);
	return undef if (!defined($_));
	$x.=$c;
    } while ($c ne "" && $c ne "\n");
    return $x;
}

# ----------------------------------------------------------------------------
# Misc routines

sub finish
{
    close C;
    do {} while (wait>0); # make sure no child exists which would need lock
    &unlock unless ($opt_n);
    exit 0;
}

sub ctlinnd
{
    local($c,$r)=@_;
    local($m);
    $m=`ctlinnd -t0 -s $c $r 2>&1`;
    $m=~tr/\r\n//d;
    unless (($m eq "") || ($m=~/Already paused/i)) {
	print STDERR "lock: $m\n";
    }
    return (($?>>8)==0);
}

sub sleeep
{
    local($a)=rand(30)+10;
    sleep $a;
    $sleeptime+=$a;
}

sub compdelay
{
    local($d)=@_;
    $del_one=21600/$d;
    $del_one=300 if ($del_one>300);
    $del_run=$del_one*2/3;
    printf STDOUT ("Delay time: %d (-d %d)\n", $del_one, $d) unless ($opt_s);
}

sub nodelay
{
    $opt_d=$del_one=$del_run=0;
    print STDOUT "Delay mode cancelled by signal\n" unless ($opt_s);
}

sub shutdown
{
    local($s)=@_;
    if ($s) {
	print STDOUT "SIG$s\n";
    }
    if ($#input>=0 && open(I, ">>$inputfile")) {
	flock(I, $LOCK_EX);
	print I @input;
	close I;
    }
    unless ($opt_s) {
	@_=times;
	printf STDOUT
	    "c-nocem done; times u=%d, s=%d, w=%d, r=%d\n\n",
	    $_[0]+$_[2], $_[1]+$_[3], $sleeptime, time-$^T;
    }
    exit 0;
}

# ----------------------------------------------------------------------------

# Local variables:
# time-stamp-start:  "ID='[^%]*%"
# time-stamp-end:    "%"
# time-stamp-format: "%04y-%02m-%02d %02H:%02M:%02S"
# End:
