#!/usr/bin/perl

# Index and preparse the contents of a minicpan checkout

use 5.008;
use strict;
use warnings;
use File::Find::Rule       ();
use File::Find::Rule::VCS  ();
use File::Find::Rule::Perl ();
use PPI::Cache             ();
use Perl::Metrics2         -DEBUG;
use CPAN::Mini::Visit      ();

our $VERSION = '0.01';

# Check params
unless ( $ARGV[0] and -d $ARGV[0] ) {
	die("Missing or invalid minicpan directory");
}
unless ( $ARGV[1] and -d $ARGV[1] ) {
	die("Missing or invalid PPI::Cache directory");
}

my $flush = !! ( $ARGV[2] and $ARGV[2] eq 'flush' );

# Initialise the metrics instance
my $metrics = Perl::Metrics2->new(
	cache => $ARGV[1],
	study => $flush ? 0 : 1,
);

# Drop indexes for speed
Perl::Metrics2->do('DROP INDEX IF EXISTS cpan_file__dist');
Perl::Metrics2->do('DROP INDEX IF EXISTS cpan_file__file');
Perl::Metrics2->do('DROP INDEX IF EXISTS cpan_file__md5' );

# Flush all entries if we get a flush option
if ( $flush ) {
	print STDERR "# Flushing all cpan_file entries...\n";
	Perl::Metrics2->do('DELETE FROM cpan_file');
}

# Identify the distributions we have already seen
my $dists = Perl::Metrics2->selectcol_arrayref(
	'SELECT DISTINCT dist FROM cpan_file'
);
my %index = ();
foreach my $dist ( @$dists ) {
	my $one = substr($dist, 0, 1);
	my $two = substr($dist, 0, 2);
	$index{"$one/$two/$dist"} = 1;
}
my $ignore = [ sub { $index{$_[0]} }, qr/PDF-API/ ];

Perl::Metrics2->begin;
CPAN::Mini::Visit->new(
	minicpan => $ARGV[0],
	acme     => 1,
	ignore   => $ignore,
	callback => sub {
		my $the = shift;
		print STDERR "# $the->{counter} - $the->{dist}\n";
		my @files = File::Find::Rule->ignore_svn->perl_file->relative->in($the->{tempdir});
		foreach my $file ( @files ) {
			print "# $file\n";
			my $path = File::Spec->catfile($the->{tempdir}, $file);
			my $md5  = PPI::Util::md5hex_file($file);
			Perl::Metrics2::CpanFile->create(
				dist => $the->{dist},
				file => $path,
				md5  => $md5,
			);
		};
		return 1 if $the->{counter} % 100;
		Perl::Metrics2->commit_begin;
	},
)->run;
Perl::Metrics2->commit;

# Restore indexes
Perl::Metrics2->do('CREATE INDEX IF NOT EXISTS cpan_file__dist on cpan_file ( dist )');
Perl::Metrics2->do('CREATE INDEX IF NOT EXISTS cpan_file__file on cpan_file ( file )');
Perl::Metrics2->do('CREATE INDEX IF NOT EXISTS cpan_file__md5  on cpan_file ( md5  )');
