#!/usr/bin/perl -s
use XML::DT;
use Data::Dumper;
use locale;

our ($latin1,$html,$show_att,$expand_att_id);

mkdtdskel (@ARGV);

sub mkdtdskel {
  my @files = @_;
  my %handler=(
    '-outputenc' => 'ISO-8859-1',
    '-default'   => sub{ 
          $c =~ s/,$//;
          push(@{$type{$q}}, (eval("[$c]") || "?$c"));
          $elel{$q}++;
          $root = $q unless ctxt(1);
          $ele{ctxt(1)}{$q} ++;
          for(keys(%v)){$att{$q}{$_}{tipo($v{$_})||$v{$_}} ++ } ;
          "'$q',";
        },
    '-pcdata'    => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1;
"'#PCDATA'," } else {""}},
  );

  if  ($html) { $handler{'-html'} = 1;} 
  if($latin1) { $handler{'-inputenc'}='ISO-8859-1';}

  for $filename (@files){
  dt($filename,%handler); 
  }

  print "# $root " . localtime(time) ."\n";
  delete $elel{$root};

  for (keys %type){
     @tipo=();
     for my $lista (@{$type{$_}}){ push (@tipo, processa($lista)) }
     $resumofinal{$_}=resumele(processa2([@tipo])).resumeatt($att{$_});
  }
  print Dumper \%resumofinal;
#  print Dumper \%type;
}

sub resumeatt{
 my $a=shift;
 my $r="";
 for (keys(%{$a})) {
   if($expand_att_id){ $r .= "\n * $_:(".join(",",keys %{$a->{$_}}) . ")" }
   else              { $r .= " * $_" }
 }
 $r
}

sub processa{
 my $a=shift;
 if( @$a == 0 ) {            +{ _isa =>"empty"} }
 elsif( @$a == 1 && $a->[0] eq '#PCDATA') { 
                             +{ _isa =>"text",$a->[0] =>[1,1]} }
 elsif( @$a == 1 )       {   +{ _isa =>"singleton",$a->[0] =>[1,1]} }
 else{ my %f = (); 
       for (@$a){$f{$_}[0]++,$f{$_}[1]++} 
       my $dif = scalar keys %f;
       if($dif == 1) {       +{ _isa =>"seq", %f} ; }
       elsif($dif == @$a) {  +{ _isa =>"tup", %f}; }
       elsif($f{'#PCDATA'}){ +{ _isa =>"mixed", %f }; }
       else {                +{ _isa =>"mtup", %f } }
 }
}

sub processa2{
 my $a=shift;
 if   ( @$a == 0 ) { "?" }
 elsif( @$a == 1 ) { $a->[0] }
 else{ 
   my %f = (); 
   my %maybe = ();
   for (@$a){$f{suns2str($_)}++;
             $maybe{$_->{_isa}}++ } 
   my $dif = scalar keys %f;
   if   ($dif == 1)    { $a->[0]; }
   elsif($maybe{mixed} || $maybe{text}){ +{%{join_suns($a)}, _isa=> "mixed"} }
       else                            { +{%{join_suns($a)}, _isa=> "mtup"} }
 }
}

sub resumele{
 my $a=shift;
 my $i = $a->{_isa};
 delete $a->{_isa};
 if    ($i eq "text")      {"text"}
 elsif ($i eq "empty")     {"empty"}
 elsif ($i eq "singleton") {join(", ", keys %{$a}) }
 elsif ($i eq "mixed")     {delete $a->{'#PCDATA'};
                            "mixed(".join(", ", keys %{$a}).")"}
 elsif ($i eq "tup")       {"tup(".join(", ", keys %{$a}).")"}
 elsif ($i eq "seq")       {"seq(".join(", ", keys %{$a}).")"}
 else { my $r= "mtup(";
        for(sort keys %$a){ 
          $r .= "$_, "  if ( $a->{$_}[0] == 1 && $a->{$_}[1] == 1 );
          $r .= "$_?, " if ( $a->{$_}[0] == 0 && $a->{$_}[1] == 1 );
          $r .= "$_*, " if ( $a->{$_}[0] == 0 && $a->{$_}[1] > 1 );
          $r .= "$_+, " if ( $a->{$_}[0] > 0  && $a->{$_}[1] > 1 );
          $r .= Dumper($a) if($r =~ /\($/ );
        }
        $r =~ s/, $//;
        $r.=")";
      }
}

sub join_suns{
 my $a = shift;
 my %final = ( map { ($_ => [$a->[0]{$_}, $a->[0]{$_}])}   keys %{$a->[0]});
 for (@$a){
   for my $k (keys %{$_}){ 
          next if $k =~ /_isa/;
          $final{$k}[0]=0  unless  $final{$k}[0];
          $final{$k}[1]=$_->{$k}  if $_->{$k} > $final{$k}[1];
          $final{$k}[0]=$_->{$k}  if $_->{$k} < $final{$k}[0];
   }
 }
 \%final
}

sub suns2str{
 my $a = shift;
 join(' ',($a->{_isa},map { $_ . ($a->{$_}==1 ? "" : "+") } sort keys %$a));
}

sub tipo{
 my $a=shift;
 for ($a){
     if(/^\s*\d+\s*$/)                      {return "_int" }
  elsif(/^\s*\d+\.\d+\s*$/)                 {return "_real" }
  elsif(!$expand_att_id && /^\w+$/)         {return "_id" }
  elsif(m!^(http|ftp|file)://\w[\w_:/.-]+$!){return "_url" }
  elsif(m!^[\w.-]+\@\w[\w_:/.-]+$!)         {return "_email" }
  else                                      {return undef } 
 }
}

__END__

=head1 NAME

mkdtskel - camila generator using XML::DT

=head1 SYNOPSIS

  mkcamilaskel <xmlfile>

=head1 DESCRIPTION

This command tries to infer camila for a specific XML file;

=head1 SEE ALSO

XML::DT(1), mkdtskel(1), mkdtdskel and perl(1)

=cut
