

/*
 *  This is a template program for feeding a site using NewsClip
 *  Use your filter program in "batch" mode:
 *  First create a .newsrc file with the groups you wish to feed to
 *  the remote site.  No "las" file is needed.   Adjust the distributions
 *  below, or put them in a file and change the init code.
 *  Create the .newsrc with:
 *		mknewsrc p=0 control '^comp' '^rec' ....   >newsrcfile
 *
 *  Now run your filter program as follows, regularly from the cron, when
 *  it is time to batch news for the remote site:
 *
 *		nclip mode=batch n=newsrcfile >/usr/spool/batch/sitename
 *
 *  On the standard output, it lists the files to be fed.  This can then
 *  be fed to the news sendbatch program.
 *
 *  Be sure to change the "feedsite" string in the article() procedure,
 *  and the distributions below.
 */

/*
 *  How this program works:
 *  New articles from groups in the site's .newsrc file are processed through
 *  the "article()" procedure.  No articles from outside those groups will
 *  be passed through.  The xrefs line (or a call to reject_xrefs()) makes
 *  sure each article is processed once.
 *
 *  First we check to see if this article is in a distribution that we wish
 *  to get.  We scan the "Rdistribution" header, and for each element, we
 *  take the leftmost components (parsing with dots).  If that component is
 *  in our database of distributions, we accept the article.  (This only
 *  supports top and 2nd-level distributions.  To support more levels, either
 *  define *  the distributions as regular expressions and use 'has', or
 *  search multiple components.)
 *
 *  If we're in the 'control' group, we check for newgroup messages.  If they
 *  already passed our distribution check, we want to subscribe to such
 *  groups.  We do this now so that it gets done even if the control article
 *  came from our feed site.
 *
 *  Next, we check to see that this article hasn't visited the site we are
 *  feeding.  We check for that site name in the first N-1 components of
 *  the Path: header.  We ignore the last component, which for ancient reasons
 *  is not a site name.
 *
 *  After that, we can do any particular checks to filter the articles.
 *  Otherwise, we accept all articles.  They get printed to the input file
 *  used by the news batching program.
 */

database distfeed;		/* distributions to be fed */

header string array control : "control", " ";	/* parse control messages */

extern string array xref;		/* to eliminate crossposts */
					/* Or use an alternate crosspost
					   eliminator -- see below */
extern newsgroup array Rdistribution;

extern string array path;		/* the path line */

procedure init()
{
	distfeed = fresh_database( 30 );
	/* alternatedly, use read_database to keep this in a file */
	/*	distfeed = read_database( "distfile" ); */


	/* Note in the following cases that the distribution must be coded
	   or the article will not be sent.  The 'world' distribution is
	   there only for articles truly posted to that distribution.  It
	   need not be present for general articles.  In order to feed an
	   article, it must be posted to one of the distributions listed,
	   *and* be in the .newsrc file controlling the feeding.  If you
	   put "news.misc" in the .newsrc, it will not be feed in its
	   entirety unless "news" is a distribution listed below.

	   BUT: You can change the code as you like.  Take out the distribution
	   filter altogether and get the entire contents of the groups in
	   the .newsrc, for example.
	  */

	/* hard code the desired distributions  1st & 2nd level only */
	distfeed["comp"]++;
	distfeed["rec"]++;
	distfeed["news"]++;
	distfeed["misc"]++;
	distfeed["soc"]++;
	distfeed["talk"]++;
	distfeed["sci"]++;
	distfeed["alt"]++;
	distfeed["biz"]++;
	/* distfeed["clari"]++; */	/* Only to paid ClariNet subscribers */
	distfeed["world"]++;
	distfeed["na"]++;		/* pick your favourite continent */
	/* distfeed["can"]++; */	/* pick your country */
	distfeed["usa"]++;
	distfeed["to"]++;		/* for the to.sitename group */
	distfeed["my state"]++;		
	distfeed["my region"]++;
	/* add your favourites here below */
	distfeed["dummy"]++;		
	/* if you feed local groups, you have to include them here as
	   distributions, too.  Or you can modify the code that checks for
	   proper distribution to pass all names in the .newsrc with no dot
	   in them.
	 */
}

/* This procedure rejects all articles that are not in a distribution
   that we are supposed to get.  You can alter this to reject articles that
   aren't entirely for your distribution, if you like.  It just takes the
   left side of the names in the distribution (newsgroups) list, and checks
   to see if they are in the database of desired distributions.

   The Rdistribution variable is special.  It is the Distribution header if
   that is defined, or the Newsgroups header if it isn't -- ie. the true
   distribution list.  It is thus always defined.
 */

procedure check_dist()
{
	int i;
	extern string left( string, int );
	/* reject all articles not in a distribution we get */

	/* scan distributions, return if one matches what we are fed */
	for( i = 0; i < count( Rdistribution ); i++ )
		if( left( Rdistribution[i], 1 ) in distfeed ||
				left( Rdistribution[i], 2 ) in distfeed )
			return;
	/* nothing matched, reject this article */
	reject;
}

/*
 * Here is an alternate distribution check:
 * It accepts all messages with no specific distibutions, plus ones that
 * have a specific distribution matching the regular expression.
	extern newsgroup array distribution;

	if( distribution != nilarray &&
			distribution !has "^(world|na|usa|state|city|company)" )
		reject;
 */
			


/* This special procedure handles control messages, searching for the
 * 'newgroup' message.
 */

procedure do_control()
{
	extern procedure subscribe( string );

	/* look for newgroup control messages */

	if( control != nilarray && count(control) >= 2 &&
			control[0] == "newgroup" )
		subscribe( control[1] );
	/* control articles then go through accept/reject */
	/* this does not catch netwide groups created locally by you */
}

/* the main article procedure, where you might put in filter code */

procedure article()
{
	extern newsgroup main_newsgroup;
	int i;

	/* reject_xref(); */		/* if Xrefs line not defined */

	check_dist();			/* check article is in their dist */

	/* handle control articles with special subscribing code */
	if( main_newsgroup == #control ) 
		do_control();


	/* reject any article that came from the feed site.  In this case
	   the feed site is hard coded, but you could pass it as a command
	   line option to avoid having multiple programs for similar sites.

	   Note we don't just say 'reject if "feedsite" in path;' because that
	   would reject articles posted by a user with a userid that matches
	   the site.  I B news, a person with a userid of "uunet" would have
	   articles blocked from that site.
	 */

	for( i = 0; i < count(path)-1; i++ )
		if( path[i] == "feedsite" )
			reject;

	/* you might want to be clever here, and also reject articles that
	   have been to other feeds of the site you are feeding.  Chances
	   are, if an article's been there, your destination already has
	   it.  This makes really short loops more efficient, although some
	   risk is involved
	 */

	/* now insert code here to filter out undesired articles */

	/* this rather extreme example rejects anything crossposted to
  	   talk.bizzare, even if it was also posted to a group you otherwise
	   wanted to get! (ie. impossible with B news) */

	reject if is talk.bizarre;

	/* otherwise accept all articles */
	accept;
}

/* The procedure below, called at the start of "article", provides
 * an alternate means for rejection of cross-posted articles, even if
 * your site doesn't properly support the Xrefs line */

 /* DON'T USE THIS PROCEDURE AND DEFINE THE XREFS HEADER AT THE SAME TIME */

procedure reject_xref()
{
	int i;
	extern newsgroup main_newsgroup;
	extern int newsrc_group( newsgroup );


	/* loop through the groups on the Newsgroups line.  Find the
	 * first one that is a subscribed group.  If that's the current
	 * group, get on with processing the article.  If it's another group,
	 * reject the article, as we will process it when we get to that
	 * first subscribed group.  ie. the article in exactly one of the
	 * groups on the Newsgroups line -- the first one of those that we
	 * subscribe to.
	 */

	for( i = 0; i < count(newsgroups); i++ )
		if( newsrc_group( newsgroups[i] ) )
			if( newsgroups[i] == main_newsgroup )
				return;		/* do nothing, first valid grp*/
			 else
				reject;		/* skip the article */
}

/*
	The B news "sys" file contains some selection options.  You should be
	able to code many of these easily in Newsclip, plus a bunch more.

	The 'L' option, which only sends local articles, can be implemented
	by accepting only articles where domain(Rsender) == my_domain, or
	articles where count(path) <= 2 (or some other number, if you want
	to broaden the definition of 'local').

	Use of Newsclip for such a feed, unfortunately, is less efficient than
	the sys file.  Such feeds rarely need filtering.

	Newsclip lets you do a lot more than this of course.
 */
