#!/usr/bin/env perl

# This scrit is meant to guess gentooisms by looking into the portage tree.
# A really good one would use the CPANPLUS API to check if the dist name
# candidates are really on CPAN.

use strict;
use warnings;

use Fatal;
use Cwd qw/cwd/;
use List::Util qw/max/;
use File::Copy qw/copy/;

use constant PORTAGE => '/usr/portage';
use constant TARGET  => 'lib/CPANPLUS/Dist/Gentoo/Maps.pm';
use constant BACKUP  => TARGET . '.bak';

my %gentooism;

my %where = (
 'Audio-CD-disc-cover' => 1,
 'aww'                 => 0,
 'frontier-rpc'        => 1,
 'gimp-perl'           => 1,
 'gnome2-wnck'         => 1,
 'HTML-Object'         => 0,
 'JKFlow'              => 0,
 'PDF-Create'          => 0,
);

my $cwd = cwd();

for my $category (qw/perl-core dev-perl/) {
 my $dir = PORTAGE . '/' . $category;
 chdir $dir;
 for my $name (<*>) {
  next unless -d $name;
  my $eb = (sort glob "$dir/$name/$name-*")[-1];
  open my $fh, '<', $eb;
  my ($pn, $on_cpan);
  $on_cpan = $where{$name} if exists $where{$name};
  while (<$fh>) {
   $on_cpan = 1 if  not defined $on_cpan
                and /(?:MODULE_AUTHOR|SRC_URI=.*?(?i:cpan))/;
   if (not defined $pn and /_PN?=(.*)/) {
    $pn = $1;
    if ($pn =~ /^\s*["']?\s*\$\{PN?\}/) {
     undef $pn;
     next;
    }
    $pn =~ s!\$[{(][^/]*?[})]!!g;
    $pn =~ s!\$\{P?V.*?\}!!g;
    $pn =~ s/^\s*["']?\s*-*\s*//;
    $pn =~ s/\s*-*\s*["']?\s*$//;
    $pn =~ s/-\d+\..*//;
    if ($pn =~ m!\$\{PN?(/.*?/(?:.*/?)?)\}!) {
     my $s = $1;
     $s .= '/' if $s =~ tr!/!! <= 2;
     eval "(\$pn = \$name) =~ s$s";
    }
   }
  }
  if ($pn and $pn ne $name) {
   if ($on_cpan) {
    $gentooism{$pn} = $name;
   } elsif (not defined $on_cpan) {
    print STDERR "'$pn' => '$name' may not be on CPAN\n";
   }
  }
 }
}

chdir $cwd;

copy TARGET, BACKUP or die "copy failed: $!";

open my $src, '<', BACKUP;
open my $dst, '>', TARGET;

my $max = max map length, keys %gentooism;

SRC: while (<$src>) {
 print $dst $_;
 if (/^__DATA__$/) {
  printf $dst "%s%s %s\n", $_, (' ' x ($max - length)), $gentooism{$_}
                                                       for sort keys %gentooism;
  last SRC;
 }
}

print STDERR +(keys %gentooism) . " gentooisms found\n";

