#!/usr/bin/env perl

use strict;
use warnings;

use Pinto;
use Pinto::Initializer;

use File::Find;
use File::Temp;
use Path::Class;
use List::Util qw(sum);
use Getopt::Long::Descriptive;

#-----------------------------------------------------------------------------

my ($opt, $usage) = describe_options(
	"$0 %o TARGETS",
    [ 'root|r=s',          "Root of repository",                             ],
    [ 'batch-size|s=i',    "Distributions per batch",    { default => 100 } ],
    [ 'batch-offset|o=i',  "Batch number to start at",   { default => 1   } ],
    [ 'batch-count|n=i',   "Number of batches to run",   { default => 0   } ],
);

my $batch_size   = $opt->batch_size;
my $batch_offset = $opt->batch_offset;
my $batch_count  = $opt->batch_count;
my $root  = $opt->root || File::Temp->newdir;
my $cpan  = shift or die 'Must specify path to CPAN directory';

#-----------------------------------------------------------------------------

Pinto::Initializer->new(root => "$root")->init(sources => "file://$cpan") unless -e $root;

#-----------------------------------------------------------------------------
printf "Searching for distributions beneath %s\n", $cpan;

my @dists;
my $cb = sub {push @dists, $_ if /[.](gz|tgz|zip|bz2)$/i and not /perl/ and not /BadExample/};
File::Find::find( {no_chdir => 1, wanted => $cb}, dir($cpan)->subdir( qw(authors id) ));
@dists = map {$_->[0]} sort {$a->[1] <=> $b->[1]} map { [$_ => (stat $_)[9]] } @dists;

if ($batch_offset - 1 > 0) {
	splice @dists, 0, $batch_offset * $batch_size, ();
}

#-----------------------------------------------------------------------------
printf "Loading %i distributions into repository at %s\n", scalar @dists, $root;

my $n = $batch_offset || 1;
my $j = 1;

while (@dists) {

	my @batch = splice @dists, 0, $batch_size, ();
	s|^.*/authors/id/./../|| for @batch;

	my $start = time;
	my $pinto = Pinto->new(root => "$root");
	$pinto->run(Pull => (targets => \@batch, no_recurse => 1, no_fail => 1, message => ''));
	printf "Batch %i loaded in %i seconds\n", $n, time - $start;

	if ($n % 10 == 0) {
		print "Vacuuming database\n";
		$pinto->repo->db->schema->storage->dbh->do('VACUUM;');
		$pinto->repo->db->schema->storage->dbh->do('ANALYZE;');
	}

	$n++;
	$j++;

	last if $batch_count and $j >= $batch_count;
}
