#/usr/local/bin/perl
'di';
'ig00';
#
# $Header: /tmp_mnt/home/netlabs1/lwall/pl/RCS/clip,v 1.1 92/07/13 12:37:09 lwall Exp Locker: lwall $
#
# $Log:	clip,v $
# Revision 1.1  92/07/13  12:37:09  lwall
# Initial revision
# 

$HOME = $ENV{HOME}
     || $ENV{LOGDIR}
     || (getpwuid($<))[7]
     || die "No home directory!!!\n";

# Configurable parameters, may be overridden in .cliprc

$SPOOLDIR = "/usr/spool/news";		# Where news articles are stored.
$NEWSLIB = "/usr/lib/news";		# Where Cnews keeps history file.
$HOMETMP = "$HOME/tmp";			# Where clip should put output.
$HOMEBIN = "$HOME/bin";			# Where clip should install N script.
$MAXLOAD = 3;				# What load average to suspend at.
$NICE = 16;				# What priority to run at.
$DEBUG = 0;				# Whether to be noisy.

$CLIPRC = "$HOME/.cliprc";
require $CLIPRC;

# Everything from here on should be machine independent.

open(N0, "$HOMETMP/n0");
$oldpid = <N0> + 0;
close N0;
if ($oldpid) {
    die "Already a clip process running ($oldpid)\n" if kill 0, $oldpid;
}

die "You must call both &NGSKIP and &SCANNER in $CLIPRC\n"
    unless defined &ngskip && defined &scanner;

sub fixmsg {
    local($_, $file, $line) = @_;
    $line -= ($Preamble =~ y/\n//);
    s/ file \(eval\) at line (\d+)/" $file at line " .  ($1 + $line)/eg;
    s/ at \(eval\) line (\d+)/" in $file at line " .  ($1 + $line)/eg;
    # $* = 1;s/^/$0: /g; # too noisy to have this
    die;
}

sub NGSKIP {
    local($userstuff) = @_;
    if ($userstuff =~ tr/\n// >= 3 && $userstuff !~ /study/) {
	$study = "\t\tstudy;\n";
    }
    else {
	$study = "";
    }
    $eval = (($Preamble = <<'END1' . $study) . $userstuff . <<'END2');
	    sub ngskip {
		local($_) = $nglist;
		eval {
		    &skip if /^cancelled$/;
END1
		};
		if ($@) {
		    $@ = "" if $@ eq "You should never see this\n";
		    die $@ if $@;
		}
	    }
END2
    print STDERR $eval if $DEBUG & 1;
    eval $eval;
    &fixmsg($@, (caller)[1,2]) if $@;
    1;
}

sub SCANNER {
    local($userstuff) = @_;
    $eval = (($Preamble=<<'END1') . $userstuff . <<'END2');
	    sub scanner {
		while (<ART>) {
		    $totalhits = 0;
		    do {
			study;
			$hits = 0;
END1
			$totalhits += $hits;
		    } while $hits;
		    &printhit if $totalhits;
		}
	    }
END2
    print STDERR $eval if $DEBUG & 1;
    eval $eval;
    &fixmsg($@, (caller)[1,2]) if $@;
    1;
}

require "timelocal.pl";

fork && exit;			# avoid nohup behavior

$pid = $$;

$pmeter = fork;
defined $pmeter || die "can't fork: $!";
if ($pmeter == 0) {
    &pmeter($MAXLOAD, $pid, $DEBUG & 2);
    die "Not reached";
}

if ($HOMEBIN) {
    system "echo kill -HUP $pid >$HOMEBIN/N";
    system "(echo ps $pid; echo ps $pmeter) >$HOMEBIN/P";
    chmod 0755, "$HOMEBIN/N";
    chmod 0755, "$HOMEBIN/P";
}

setpriority(0, 0, $NICE);		# set very slow priority

$date = shift;
chop($date = `cat $HOME/.lastclip`) if !$date && -f "$HOME/.lastclip";
$date = &lidate($date);

chdir $SPOOLDIR || die "Can't cd: $!\n";

select(STDERR); $| = 1;
select(STDOUT); $| = 1;

&CATCH;
$SIG{HUP} = CATCH;		# send SIGHUP to switch to new history file
$SIG{USR1} = RESTART;		# send SIGUSR1 to re-execute clip
$SIG{ALRM} = IGNORE;		# so we can send ourselves an alarm safely

$r = "\r" if -t STDOUT;

($dev,$ino,$mode,$nlink,$uid) = stat STDOUT;
$origuid = $uid;
for (;;) {
  LOGLINE:
    while (<LOG>) {
	$pos = tell(LOG);
	chop;
	($messid,$date,$nglist) = split(/\t/);
	$0 = "clip at $messid" if $DEBUG & 1;
	$wanted = 1;
	&ngskip;
	next LOGLINE unless $wanted;
	$date = &gidate($date);
	($ng,$art) = split(m![ /]!,$nglist);
	next unless $art;
	$ng =~ y!.!/!;
	open(ART,"$ng/$art") || next;
	next if -s ART > 100_000;
	$count = 0;
	++$slept;		# to force quick update after big batch
	$/ = '';
	$header = <ART>;
	$_ = '';
	&scanner;
	close ART;
	$sleep = 5;
    }
    continue {
	$/ = "\n";
    }
    print STDERR "clip: caught up$r\n" unless $tailing++;
    sleep $sleep;
    $slept += $sleep;
    if ($slept > 300) {
	$slept = 0;
	if ($date != $lastdate) {
	    ($dev,$ino,$mode,$nlink,$uid,$gid) = stat STDOUT;
	    exit unless $uid == $origuid;
	    open(LASTDATE,">$HOME/.lastclip");
	    print LASTDATE &cdate($date),"\n";
	    close LASTDATE;
	    $lastdate = $date;
	}
	($dev,$ino) = stat("$NEWSLIB/history");
	if ($dev != $logdev || $ino != $logino) {
	    print STDERR "************* NEW HISTORY FILE *************$r\n";
	    open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n";
	    ($logdev, $logino) = stat LOG;
	    &seekdate($date);
	    $tailing = 0;
	    $slept = 300;		# force immediate update of lastclip.
	}
    }
    $sleep++ if $sleep < 120;
    seek(LOG,$pos,0);
}

sub hit {
    ($before,$match,$after) = ($`,$&,$');
    $match =~ s/(.)/_\b$1/g;
    $_ = "$before$match$after";
    study;
    $hits++;
    unless ($count++) {
	$header =~ /From: (.*)/;
	($from = $1) =~ s/.*\((.*)\).*/$1/;
	print("$r\n\@$ng/$art  \tFrom: $from$r\n") || exit;
	$header =~ /Subject: (.*)/ && print "Subject: $1$r\n";
	$0 = "clip " . &cdate($date);
    }
}

sub skip {
    $wanted = 0;
    die "You should never see this\n";
}

sub printhit {
    if (length() > 500) {
	$wanted = '';
	for ($line = 0; /.*\n/g; $line++) {
	    if ($& =~ /_\010/) {
		for ($w = $line - 3; $w <= $line + 3; $w++) {
		    vec($wanted,$w,1) = 1;
		}
	    }
	}
	$unlines = 0;
	for ($line = 0; /.*\n/g; $line++) {
	    if (vec($wanted,$line,1)) {
		print($&,$r) || exit;
		$unlines = 0;
	    }
	    else {
		print("...\n$r") || exit unless $unlines++;
	    }
	}
    }
    else {
	s/\n/\n$r/g if $r;
	print() || exit;
    }
}

sub seekdate {
    local($start) = shift;
    if ($start == 9_999_999_999) {
	seek(LOG,0,2);
	$pos = tell(LOG);
	print STDERR "$pid starting at eof...$r\n";
	return;
    }
    ($st_dev,$st_ino,$st_mode,$st_nlink,$st_uid,$st_gid,$st_rdev,$st_size,
	$st_atime,$st_mtime,$st_ctime,$st_blksize,$st_blocks) = stat(LOG);
    for ($offset = $st_size - 100_000; $offset > 0; $offset -= 100_000) {
	if (seek(LOG,$offset,0)) {
	    $_ = <LOG>;            # probably starts in middle of a line
	    $_ = <LOG>;
	    ($messid,$date,$nglist) = split(/\t/);
	    $date = &gidate($date);
	    last if $date < $start;
	}
	else {
	    $offset = -1;
	}
    }
    seek(LOG,0,0) if $offset < 0;
    while (<LOG>) {
	($messid,$date,$nglist) = split(/\t/);
	$date = &gidate($date);
	last if $date >= $start;
    }
    $pos = tell(LOG);
    $pct = int($pos * 100 / $st_size);
    print STDERR "$pid starting at $pct% for $start...$r\n";
    $0 = "clip start at $messid" if $DEBUG & 1;
}

sub CATCH {
    &openout;
    open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n";
    ($logdev, $logino) = stat LOG;
    $date = 9_999_999_999 unless $date;
    &seekdate($date);
    $tailing = 0;
    $slept = 300;		# force immediate update of lastclip.
    $SIG{HUP} = CATCH;
    kill $$, ALRM;
}

sub RESTART {
    kill 9, $pmeter if $pmeter;
    exec "clip";
    die "Couldn't exec clip: $!\n";
}

sub cdate {
    ($sec,$min,$hr,$mday,$mon,$year) = localtime($date);
    sprintf("%02d/%02d/%02d %02d:%02d",$mon+1,$mday,$year,$hr,$min);
}

sub lidate {
    $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)#
      ? &timelocal(0, $5, $4, $2, $1-1, $3)
      : $_[0];
}

sub gidate {
    $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)#
      ? &timegm(0, $5, $4, $2, $1-1, $3)
      : $_[0];
}

sub openout {
    rename("$HOMETMP/n8", "$HOMETMP/n9");
    rename("$HOMETMP/n7", "$HOMETMP/n8");
    rename("$HOMETMP/n6", "$HOMETMP/n7");
    rename("$HOMETMP/n5", "$HOMETMP/n6");
    rename("$HOMETMP/n4", "$HOMETMP/n5");
    rename("$HOMETMP/n3", "$HOMETMP/n4");
    rename("$HOMETMP/n2", "$HOMETMP/n3");
    rename("$HOMETMP/n1", "$HOMETMP/n2");
    rename("$HOMETMP/n0", "$HOMETMP/n1");
    open(STDOUT, ">$HOMETMP/n0");
    open(STDERR, ">&STDOUT");
}

sub pmeter {
    # ($loadavg, $pid, $debug) = @ARGV;
    local($loadavg, $pid, $debug) = @_;
    $running = 1;
    $0 = "pmeter @_";

    while (kill 0, $pid) {
	`uptime` =~ /load average:\s+([\d.]+)/
	    || die "Can't run uptime: $!\n";;

	if ($1 > $loadavg) {
	    kill 'STOP', $pid;
	    if ($running) {
		print STDERR "stopping at $1\r\n" if $debug;
		$0 = "pmeter (stopped $pid at $loadavg)";
		$running = 0;
	    }
	}
	else {
	    kill 'CONT', $pid;
	    if (!$running) {
		print STDERR "starting at $1\r\n" if $debug;
		$0 = "pmeter (started $pid at $loadavg)";
		$running = 1;
	    }
	}
	sleep 120;
    }
}
###############################################################

    # These next few lines are legal in both Perl and nroff.

.00;                       # finish .ig
 
'di           \" finish diversion--previous line must be blank
.nr nl 0-1    \" fake up transition to first page again
.nr % 0         \" start at page 1
'; __END__ ##### From here on it's a standard manual page #####

.TH CLIP 1 "July 14, 1992"
.de M           \" man page reference
\\fI\\$1\\fR\\|(\\$2\)\\$3
..
.AT 3
.SH NAME
clip \- personal news clipping service
.br
pmeter \- start and stop a process according to load average
.SH SYNOPSIS
.B clip
[
.I date
]
.SH DESCRIPTION
.I Newsclip
scans incoming news by following the C news history file as it grows
and examining each article listed there for patterns of interest.
.PP
If a date argument is supplied on the command line (in the form 
.IR "``mm/dd/yy hh:mm''" , 
including the embedded space)
.I clip
will read articles since that date.  With no arguments, it examines
the user's ~/.lastclip file to know when it left off scanning;
if no such file exists, every news article will be scanned.
.PP
Patterns and options are specified in a .cliprc file situated
in your home directory.  The .cliprc file is simply a gob of Perl
code that will be evaluated by clip after it has set its default
options but before it actually goes out to do anything.  The only
mandatory items are a call to each of two subroutines: &NGSKIP and
&SCANNER.  A typical .cliprc file looks like this:
.nf

.ne 6
    # Options I want to override.
    $MAXLOAD = 4;

    # Newsgroups I always read anyway, so don't bother.
    &NGSKIP( <<'END' );
        &skip if /comp\e.lang\e.perl/;
        &skip if /rec\e.humor\e.funny/;
    END

.ne 8
    # Patterns I'm interested in scanning for.
    &SCANNER( <<'END' );
        &hit if /betty[^\e0]boop/i;
        &hit if /roger[^\e0]rabbit/i;
        &hit if /\ebw.*coyote\eb/i;

        if (/\ebacme\eb/i) {{
            next if $` =~ /roadrunner\e@$/;
            &hit;
        }}

        if (/\eblooney\eb/i) {{
            next if $' =~ /^\es*bin/;
            next if $` =~ /!$/;
            next if $` =~ /fudd\e@/i;
            next if $nglist =~ /alt\e.crazy\e.people/;
            &hit;
        }}
    END

.fi
The argument to the &NGSKIP routine is a sequence of zero or more Perl
statements that call &skip if the current article is crossposted to a
newsgroup that we don't want to scan (generally because we'll read the
newsgroup anyway).  Before your code is called, the $_ variable is
automatically set to the list of newsgroups from the history file.
You can invert the logic and just pick the newsgroups you want by
saying:
.nf


.ne 9
    &NGSKIP(<<'END');
        study;
        {
            last if /comp\e.lang\e.perl/;
            last if /comp\e.org\e.usenix/;
            last if /comp\e.unix\e.bsd/;
            &skip;
        }
    END

.fi
.PP
The argument to the &SCANNER routine is a sequence of one or more Perl
statements that call &hit if an interesting pattern has been spotted.
Before your code is called, the $_ variable is set to the current paragraph
of the current article (articles are read and scanned paragraph by paragraph).
Newsclip will snip excerpts from any paragraph containing patterns,
underlining the patterns it found.  Note that some patterns are
unqualified hits, while others are hits only if some other pattern doesn't
match.  (It's important to the underlining algorithm that the value
of $& continue to contain the value of the last 
.I successful
match \(em all
of the exceptions are expected to fail.)  For the purposes of exception
scanning, the variables $` and $' are automatically set to the preceding
and subsequent text by the successful initial pattern match.  In addition,
.I clip 
sets the variable $nglist to the current list of newsgroups, and
the variable $header to be the header of the current article.
.PP
The use of double curlies is merely to allow the ``next'' command to fall
through to the next test, since ``next'' will simply fall out of an ordinary
block.  The ``last'' command could also have been used, but would be less
intuitive.  Other special thingies you might want to use include \eb
to assert a word boundary, [^\e0] to match any character but a null,
including a newline, and any other regular expression goodies you can
think up to detect spelling errors and variants.  Tom Christiansen
searches for his name with:
.nf

.ne 4
    if (/\ebtchrist\eb/i || /tom[^\e0]christ(ia|e)ns[eo]n/i) {{ # grr...
        next if /\en\es*Tom Christiansen\es+tchrist\e@convex.com/;
        &hit;
    }}

.fi
.PP
.I Newsclip
forks another copy of itself,
called
.IR pmeter ,
which runs 
.M uptime 1
every two minutes to make sure the load hasn't gone too
high.  If it has, 
.I pmeter
suspends 
.I clip 
with a SIGSTOP,
waiting until the load average goes back down before
allowing 
.I clip 
to continue.
Both 
.I pmeter
and 
.I clip
continually
muck with their own externally-visible argument list
to keep folks running
.M ps 1
amused.  This has no effect if your operating system doesn't
support changes to argv being noticed by ps.
.PP
Output by default goes to ~/tmp/n0.  When 
.I clip 
is hit by a SIGHUP,
it renames n8 to n9, n7 to n8, ..., n0 to n1, and creates a new n0
file.  For handiness, when 
.I clip 
starts up, it creates a tiny script
called ``N'' that will send the SIGHUP to the correct process.  Typically
one runs N in the morning to set up a new n0 file, and then reads ~/tmp/n1
to see what the scanner found since the last time you ran N.  A companion
script, named ``P'', runs 
.I ps
on the 
.I clip
and 
.I pmeter
processes.  (This assumes a BSD-style 
.I ps 
program.)
.PP
The first line of each article reference begins with an @, in case you
want to cut and paste that line to something that looks up the article
for you.  I use the following rn macro.  Unfortunately I have to paste
the line twice because of how rn eats typeahead, but hey, it beats a
kick in the head.  This macro should all be on one line \(em it's broken in
two so that nroff doesn't get upset:
.nf

.ne 12
@ %(%m=n?:%(%m!=a?q:)q)%(%"^Jng/art: "=\e([^ @]*\e)/\e([^ ]*\e)?
g%`perl -e '($_="%1")=~tr#/#.#;print'`^J.%2^J:%(%m=n?^L:))

.fi
.PP
Interesting variables to set in your .cliprc file include the following:
.TP 15
.B $SPOOLDIR
Where news articles are stored; defaults to /usr/spool/news.
.TP 
.B $NEWSLIB 
Where Cnews keeps its history file; defaults to /usr/lib/news.
.TP 
.B $HOMETMP 
Where clip should put its output files;
defaults to ~/tmp.
.TP 
.B $HOMEBIN 
Where clip should install N script;
defaults to ~/bin.
.TP 
.B 
$MAXLOAD 
What load average to suspend at; defaults to 3.
.TP 
.B $NICE 
What priority to run at; defaults to 16.
.TP 
.B $DEBUG
Whether to be noisy; defaults to 0.  A value
with the 1 bit set causes 
.I clip
to be noisy;
a value with the 2 bit set causes
.I pmeter
to be noisy.
.SH ENVIRONMENT
HOME or LOGDIR	
.SH SIGNALS
.nf 
SIGHUP	Cycle log files.
SIGUSR1	Re-exec oneself in case .cliprc changes.
SIGALRM	Make clip wake up early.
.fi
.SH FILES
.nf
.ta \w'$HOME/.cliprc   'u
$HOME/.cliprc	Your clipping preferences.
$HOME/.lastclip	Date and time of last article scanned.
$HOMETMP/n[0-9]	Log files.
$HOMEBIN/N	Cycle log files: n8 -> n9, n7 -> n8, etc.
.fi 
.SH AUTHOR
Larry Wall, with heckling by Tom Christiansen.
.SH "SEE ALSO"
.M perl 1 ,
.M rn 1 ,
.M ps 1 ,
.M uptime 1 ,
.M newsmaint 8 .
.SH DIAGNOSTICS
Obscure diagnostics are an obsolete concept rooted in the notion that
programs must be shoehorned into memory.
.SH BUGS
There ought to be a way for multiple users to share a single clip process.
.PP
It doesn't work on NNTP-only systems.
.ex
