#!/usr/bin/perl
use strict;
$|++;

my $VERSION = '0.05';

#----------------------------------------------------------------------------

=head1 NAME

cpanreps-verify - Verify that the CPAN Testers Reports website pages.

=head1 SYNOPSIS

  perl cpanreps-verify

=head1 DESCRIPTION

Given the website directory and a local cpanstats database, this program will
verify that each page contains the latest updates. Outputs in a format that can
then be passed to cpanreps-update.

=cut

# -------------------------------------
# Library Modules

use Config::IniFiles;
use CPAN::Testers::Common::DBUtils;
use File::Find::Rule;
use File::Path;
use File::Slurp;
use Getopt::ArgvFile default=>1;
use Getopt::Long;
use IO::File;
use Parse::CPAN::Authors;


# -------------------------------------
# Variables

my (%options);

# -------------------------------------
# Program

##### INITIALISE #####

init_options();

check_dists();
check_authors();


# -------------------------------------
# Subroutines

sub check_dists {
    my (%distros,$distro,%errors);
    my @rows = $options{uploads}->get_query('array',"SELECT DISTINCT(dist) FROM uploads");
    for my $row (@rows) {
        next    unless($row->[0] =~ /^[A-Za-z0-9][A-Za-z0-9\-_]*$/);
        $distros{$row->[0]} = 1;
    }

    # check distribution pages
    my @files = File::Find::Rule->file()->name( '*.html' )->in( "$options{directory}/show/" );
    for my $file (sort @files) {
        my ($dist) = $file =~ m!/([^/]+)\.html$!;
        next    unless($dist);
        next    if($dist =~ /[^-\w\.]/);    # illegal chars

        $distros{$dist} = 0;

        next    if($dist =~ /^index$/);     # tough luck!

        my $js = "$options{directory}/show/$dist.js";
        unless(-f $js) {
            print "dist:$dist: $js [MISSING JAVASCRIPT]\n";
            next;
        }

        my @vers = $options{uploads}->get_query('array',"SELECT version FROM uploads WHERE dist=? ORDER BY released desc LIMIT 1",$dist);
        next    unless(@vers);

        my $version = $vers[0]->[0];
        next    unless($version);
        next    if($version =~ /[^-\w\.]/);    # illegal chars

        my $content = read_file($file);

        if($content !~ m!Find A Tester!si) {
            print "dist:$dist: $file [MISSING TESTER LINK]\n";
            $errors{$dist} = 1;
            next;
        }

        $content = read_file($js);
        next    if($content =~ m!"$dist-$version"!si);

        print "dist:$dist: $js [$version]\n";
        $errors{$dist} = 1;
    }

    print "dist:$_: $options{directory}/show/$_.html [MISSING HTML]\n"
        for(grep {$distros{$_} == 1} keys %distros);

    # check stats pages
    @files = File::Find::Rule->file()->name( '*.html' )->in( "$options{directory}/show/" );
    for my $file (sort @files) {
        my ($dist) = $file =~ m!/([^/]+)\.html$!;
        next    unless($dist);
        next    if($errors{$dist});
        next    if($dist =~ /^index$/);     # tough luck!
        next    if($dist =~ /[^-\w\.]/);    # illegal chars

        my $content = read_file($file);

        if($content !~ m!Find A Tester!si) {
            print "dist:$dist: $file [MISSING TESTER LINK]\n";
            next;
        }

        next    if($content =~ m!<link.*href="$dist.rss"!si);   # old RSS link

        print "dist:$dist: $file [OLD STATS]\n";
    }
}

sub check_authors {
    my $p = Parse::CPAN::Authors->new("data/01mailrc.txt.gz");
    my %authors = map {$_->pauseid => 1} $p->authors;

    my @files = File::Find::Rule->file()->name( '*.html' )->in( "$options{directory}/author/" );
    for my $file (sort @files) {
        my ($author) = $file =~ m!/([^/]+)\.html$!;
        next    unless($author);
        next    if($author =~ /^index$/);     # tough luck!

        $authors{$author} = 0;

        my $js = "$options{directory}/author/$author.js";
        unless(-f $js) {
            print "author:$author: $js [MISSING JAVASCRIPT]\n";
            next;
        }

        my (%dists);
        my @rows = $options{uploads}->get_query('array',"SELECT dist,version FROM uploads WHERE author=? ORDER BY released desc",$author);
        for my $row (@rows) {
            next unless($row->[0] =~ /^[A-Za-z0-9][A-Za-z0-9\-_]*$/);
            $dists{$row->[0]} ||= $row->[1];
        }

        my $content = read_file($file);
        if($content !~ m!Find A Tester!si) {
            print "author:$author: $file [MISSING TESTER LINK]\n";
            next;
        }

        my $jscontent = read_file($js);
        for my $dist (keys %dists) {
            my $version = $dists{$dist};
            next    if($version =~ /[+]/);  # bad characters

            #<h2><a name="Games-LogicPuzzle" /><a href="../show/Games-LogicPuzzle.html">Games-LogicPuzzle</a> 0.20
            next    if($content =~ m!<h2><a name="$dist" /><a\s+href="\.\./show/$dist.html">$dist</a>\s+$version!si);
            next    if($jscontent =~ m!"$dist"!si);

            print "author:$author: $file [$dist-$version]\n";
            last;
        }
    }

    print "author:$_: $options{directory}/author/$_.html [MISSING HTML]\n"
        for(grep {$authors{$_} == 1} keys %authors);
}

sub init_options {
    GetOptions( \%options,
        'config|c=s',
        'help|h',
        'version|v'
    );

    help(1) if($options{help});
    help(0) if($options{version});

    help(1,"Must specific the configuration file")              unless($options{config});
    help(1,"Configuration file [$options{config}] not found")   unless(-f $options{config});

    # load configuration
    my $cfg = Config::IniFiles->new( -file => $options{config} );
    $options{$_} = $cfg->val('MASTER',$_)  for(qw(directory));

    # validate entries
    help(1,"No directory found: $options{directory}")           unless(-d $options{directory});

    # configure upload DB
    my %opts = map {$_ => $cfg->val('UPLOADS',$_);} qw(driver database dbfile dbhost dbport dbuser dbpass);
    $options{uploads} = CPAN::Testers::Common::DBUtils->new(%opts);
    help(1,"Cannot configure '$options{database}' database")    unless($options{uploads});
}

sub help {
    my ($full,$mess) = @_;

    if($mess) {
        print "\n$mess\n\n";
    }

    if($full) {
        print <<HERE;

Usage: $0 \\
         [-d directory] [-t database] [-h] [-V]

  -d directory  directory location of website files
  -t database   local database file
  -h            this help screen
  -V            program version

HERE

    }

    print "$0 v$VERSION\n\n";
    exit(0);
}


__END__

=head1 BUGS, PATCHES & FIXES

There are no known bugs at the time of this release. However, if you spot a
bug or are experiencing difficulties, that is not explained within the POD
documentation, please send bug reports and patches to the RT Queue (see below).

Fixes are dependant upon their severity and my availablity. Should a fix not
be forthcoming, please feel free to (politely) remind me.

RT: http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers

=head1 SEE ALSO

L<CPAN::WWW::Testers::Generator>
L<CPAN::Testers::WWW::Statistics>

F<http://www.cpantesters.org/>,
F<http://stats.cpantesters.org/>

=head1 AUTHOR

  Barbie <barbie@cpan.org>

=head1 COPYRIGHT AND LICENSE

  Copyright (C) 2008      Barbie <barbie@cpan.org>

  This module is free software; you can redistribute it and/or
  modify it under the same terms as Perl itself.

=cut
