#!/usr/bin/perl
use strict;
$|++;

my $VERSION = '0.02';

#----------------------------------------------------------------------------

=head1 NAME

articles-verify - script to verify the contents of the articles database.

=head1 SYNOPSIS

  perl articles-verify --config=<file>          \
    [--file=<file> | --start=0 [--end=0] ]      \ 
    [--update | --verify | --check]

=head1 DESCRIPTION

Reads the articles database and verifies the contents.

=head1 OPTIONS

=over 4

=item --config

Configuration file contain database access details.

=item --file

File containing one NNTP ID per line, where each line can be CSV formatted, 
provided the ID is the first field.

=item --start --end

Start and end NNTP ids. If the end option is missing all subsequent ids are
listed.

=item --update

For the given list of ids, update the articles database by retrieving renewed
copies from the NNTP server.

=item --verify

For the given list of ids, verify the articles in the database.

=item --check

For the given list of ids, find missing or invalid entries in the database.

=back

=cut

# -------------------------------------
# Library Modules

use Config::IniFiles;
use CPAN::Testers::Common::DBUtils;
use Email::Simple;
use Getopt::ArgvFile default=>1;
use Getopt::Long;
use IO::File;
use Net::NNTP;

# -------------------------------------
# Variables

my ($nntp,$num,$first,$last);
my (%log,%options,$dbi);
my $PROGRESS = 0;

use constant    NNTPSTART   => 1;

# -------------------------------------
# Program

##### INITIALISE #####

init_options();

##### MAIN #####

if($options{update})    { update_articles() }
elsif($options{verify}) { verify_articles() }
elsif($options{check})  { check_articles()  }

# -------------------------------------
# Subroutines

=item check_articles

Report on the database entries which are either missing, or have reported bad
processing.

=cut

sub check_articles {
    my @list = get_list();
    for my $id (@list) {
        my @rows = $dbi->get_query('array',"SELECT * FROM articles WHERE id=$id");
	if(@rows) {
            my ($xref) = ($rows[0]->[1] =~ /Xref:.*perl\.cpan\.testers:($id)\b/is);
            print "$id,invalid\n"	unless($xref);
	} else {
            print "$id,missing\n";
	}
    }
}

=item verify_articles

Report on the requested database entries as to whether they are found or 
missing.

=cut

sub verify_articles {
    my @list = get_list();
    for my $id (@list) {
        my @rows = $dbi->get_query('array',"SELECT * FROM articles WHERE id=$id");
        if(@rows) {
            my $subject = get_subject($rows[0]->[1]);
            print "$id,found - $subject\n";
        } else {
            print "$id,missing\n";
        }
    }
}

=item update_articles

=cut

sub update_articles {
    $nntp = Net::NNTP->new("nntp.perl.org")
        || die "Cannot connect to nntp.perl.org";
    ($num, $first, $last) = $nntp->group("perl.cpan.testers");

    my @list = get_list();
    for my $id (@list) {
        my $article = get_article($id);
        next    unless($article);

        my @rows = $dbi->get_query('array',"SELECT id FROM articles WHERE id = $id");
        if(@rows) {
            $dbi->do_query("UPDATE articles SET article=? WHERE id=?",$article,$id);
        } else {
            $dbi->do_query("INSERT INTO articles VALUES (?,?)",$id,$article);
        }
        print "$id,updated\n";
    }
}


=item get_subject

Extract the subject from the given article..

=cut

sub get_subject {
  my $article = shift;

  # parse the resulting headers
  my $mail = Email::Simple->new($article);
  return $mail->header("Subject");
}

=item get_article

Access the NNTP server to get the real subject recorded for the article,
unless we can short cut the network by accessing the information from the
project log file.

=cut

sub get_article {
  my $id = shift;

  # talk NNTP
  my $article = join "", @{$nntp->article($id) || []};
  return "" unless($article);
  return $article;
}

=item get_list

Returns the list of NNTP ids from the named file.

=cut

sub get_list {
    my @list;

    if($options{start}) {
        my $end = $options{end} || get_lastid();
        @list = ($options{start} .. $end);
	print STDERR "START: $options{start}\nEND:   $options{end}\n";

    } elsif($options{file}) {
        die "file [$options{file}] not found"    unless(-f $options{file});

        my $fh = IO::File->new($options{file})   or die "Cannot open file [$options{file}]: $!";
        while(<$fh>) {
            chomp;
            my ($num) = (m/^(\d+)/);
            push @list, $num;
        }
        $fh->close;
	printf STDERR "FILE: %d ids found\n", scalar(@list);

    } else {
        die "No start/end or file list specified\n"
    }

    return @list;
}

=item get_lastid

Returns the last NNTP id recorded in the database.

=cut

sub get_lastid {
    my @rows = $dbi->get_query('array',"SELECT MAX(id) FROM articles");
    return $rows[0]->[0];
}

=item init_options

Determine command line options and initialise any defaults.

=cut

sub init_options {
    GetOptions( \%options,
        'config=s',
        'file=s',
        'start=i',
        'end=i',
        'update',
        'verify',
        'check',
        'help|h',
        'version|v'
    );

    help(1) if($options{help});
    help(0) if($options{version});

    help(1,"Must specify the configuration file")              unless($options{config});
    help(1,"Configuration file [$options{config}] not found")   unless(-f $options{config});

    # load configuration
    my $cfg = Config::IniFiles->new( -file => $options{config} );

    # configure databases
    my $db = 'LITEARTS';
    die "No configuration for $db database\n"   unless($cfg->SectionExists($db));
    my %opts = map {$_ => $cfg->val($db,$_);} qw(driver database dbfile dbhost dbport dbuser dbpass);
    $dbi = CPAN::Testers::Common::DBUtils->new(%opts);
    die "Cannot configure $db database\n" unless($dbi);
}

sub help {
    my ($full,$mess) = @_;

    print "\n$mess\n\n" if($mess);

    if($full) {
        print <<HERE;

Usage: $0 \\
     --config=<file>					\\
     [ ( --start=n [--end=n] | --file=<file> ) ]	\\
     [ --update | --verify | --check ]			\\
     [-h] [-v]

  --config=<file>   - configuration file

  --start           - verify from id
  --end             - verify to id
  --file=<file>     - verify these ids (1 per line)
  
  --update	    - update articles
  --verify	    - verify articles
  --check	    - check articles

  -h                - this help screen
  -v                - program version

HERE

    }

    print "$0 v$VERSION\n";
    exit(0);
}

__END__

=back

=head1 BUGS, PATCHES & FIXES

There are no known bugs at the time of this release. However, if you spot a
bug or are experiencing difficulties, that is not explained within the POD
documentation, please send bug reports and patches to the RT Queue (see below).

Fixes are dependant upon their severity and my availablity. Should a fix not
be forthcoming, please feel free to (politely) remind me.

RT Queue -
http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-Testers-Data-Generator

=head1 SEE ALSO

L<CPAN::WWW::Testers>,
L<CPAN::Testers::WWW::Statistics>

F<http://www.cpantesters.org/>,
F<http://stats.cpantesters.org/>,
F<http://wiki.cpantesters.org/>

=head1 AUTHOR

  Barbie, <barbie@cpan.org>
  for Miss Barbell Productions <http://www.missbarbell.co.uk>.

=head1 COPYRIGHT AND LICENSE

  Copyright (C) 2005-2008 Barbie for Miss Barbell Productions.

  This module is free software; you can redistribute it and/or
  modify it under the same terms as Perl itself.

=cut

