#!/usr/bin/perl -w
#TODO: when losing BECAUSE child thru merge, become {SELF}BECAUSE
use strict;

# $Id: makeppgraph,v 1.7 2007/01/05 21:39:44 pfeiffer Exp $

our $VERSION = '@VERSION@';
use Config;
use POSIX ();

our $datadir;
BEGIN {
#@@setdatadir
#
# Find the location of our data directory that contains the auxiliary files.
# This is normally built into the program by install.pl, but if makepp hasn't
# been installed, then we look in the directory we were run from.
#
  $datadir = $0;		# Assume it's running from the same place that
				# we're running from.
  unless ($datadir =~ s@/[^/]+$@@) { # No path specified?
				# See if we can find ourselves in the path.
    foreach (split(/:/, $ENV{'PATH'}), '.') {
				# Add '.' to the path in case the user is
				# running it with "perl makeppgraph" even if
				# . is not in his path.
      if (-f "$_/FileInfo.pm") { # Found something we need?
	$datadir = $_;
	last;
      }
    }
  }
  $datadir or die "makepp: can't find library files\n";

  $datadir = eval "use Cwd; cwd . '/$datadir'"
    if $datadir =~ /^\./;	# Make it absolute, if it's a relative path.
#@@
  unshift @INC, $datadir;
}

use Utils;
use Glob ();
use FileInfo qw(absolute_filename file_info $CWD_INFO);
use FileInfo_makepp;
use TextSubs ();

sub eval_or_die($) {
  my $result = eval $_[0];
  &maybe_die;
  $result;
}

BEGIN {
  *DEPEND = \&TextSubs::CONST1;
  *INCL = \&TextSubs::CONST2;
  *PHONY = \&TextSubs::CONST2;
  *BECAUSE = \&TextSubs::CONST4;
}
sub SELF() { '' }		# Impossible node name used for own attributes.

my $home = absolute_filename FileInfo::dereference file_info $ENV{HOME} || (getpwuid $<)[7];
$datadir = absolute_filename FileInfo::dereference file_info $datadir;


package Rewrite;

# Reduce any file (execept dirs) to the containing directory.
sub dir() { defined and $_ ne '' and -d || s!/[^/]+$!! || s!^(\|\w+\|).*!$1! }

# Replace your home directory with '~'
sub home() { defined and s/^$home/~/ }

# Replace makepp dir with abbreviation |m|.
sub makepp() { defined and s!^$::datadir(?:/|$|(?=:))!|m|! }

# Replace basename including dir or optionally only part of dir with '*'
sub suf(;$) {
  no warnings 'uninitialized';
  if( !defined $_[0] ) {
    s!.+ (\.[^/.]+) $!*$1!x;
  } elsif( !$_[0] ) {
    s!^ ([/~|]?) .+ (\.[^/.]+) $!$1*$2!x;
  } elsif( $_[0] > 0 ) {
    my $n_1 = $_[0] - 1;
    s!^ (\|\w+\| | [/~]?[^/]*/ )? ((?:[^/]+/){0,$n_1}) .+ (\.[^/.]+) $!$1$2*$3!x;
  } else {
    my $n_1 = -1 - $_[0];
    s! (?: ^\|\w+\| | ^/(?:[^/]+/)? | ^~[^/]*/ | [^|/]+/ )? (?:[^/]+/){0,$n_1} [^/]+ (\.[^/.]+) $!*$1!x;
  }
}

# Replace common system dirs with abbreviation.
sub usr() {
  no warnings 'uninitialized';
  s!^/(?:(?=(.))(?:opt|usr)/(?:(?=(.))(?:local|X11R?[67]?)/)?)?(?=(.))(?:bin|etc|include|lib|share)(?:/|$)!|$1$2$3|!;
}



sub merge($$$$) {
  return if !/([^\/]+)$_[2]$/;
  my $basename = $1;
  for( keys %{$_[0]} ) {
    if( /(?:.*\/)?\Q$basename\E$_[3]$/ ) {
      delete $_[0]{$_};
      return ["$_$_[1]", $_];
    }
  }
}

# .o in any dir depending on a C or C++ source
sub c2o($) { merge $_[0], '>o', qr/\.o/, qr/\.(?:c(|[xp+])\1|cc|CC?)/ }

# basename or .exe in any dir depending on same .o file
sub exe($) { merge $_[0], '*', qr/(?:\.exe)?/, qr/\.o/ }

# Same name in different directories, like headers published to a central include dir.
sub x2($) { merge $_[0], '*2', '', '' }



package main;



# Styles which can be overridden.
our %head = ( dot => <<EOS, udg => "[\n" );
// Generated by makeppgraph
digraph a {
rankdir=LR
node [shape=box style=filled fillcolor=white]
edge [dir=back]
EOS


our %include =
 (dot => 'style=dotted',
  udg => 'a("EDGEPATTERN","dotted")');

our %because =
 (dot => 'style=bold color=red',
  udg => 'a("EDGEPATTERN","thick"),a("EDGECOLOR","#ff0000")');

our %because_self =
 (dot => 'style="filled,bold" color=red',
  udg => 'a("BORDER","double")');

our %phony =
 (dot => 'shape=ellipse',
  udg => 'a("_GO","ellipse")');

our @file_attr =
 (
  qr/\.(?:c(|[xp+])\1|cc|CC?)(?:>o)?$/ =>
    {dot => 'fillcolor="#f8a808"',
     udg => 'a("COLOR","#f8a808")'},
  qr/\.(?:h(|[xp+])\1|hh|HH?)(?:\*2)?$/ =>
    {dot => 'fillcolor="#f8e800"',
     udg => 'a("COLOR","#f8e800")'},
  qr/\.(?:[ep]c)$/ =>
    {dot => 'fillcolor="#98e800"',
     udg => 'a("COLOR","#98e800")'},
  qr/\.(?:[ao]|so(?:\.[\d.]+)?)$/ => # objects and libs
    {dot => 'fillcolor="#e8e8e8"',
     udg => 'a("COLOR","#e8e8e8")'},
  qr/(?:\.(?:p[ml]c?|mk|makepp)|[Mm]akep*file)(?:\.in)?$/ =>
    {dot => 'fillcolor="#0090e0"', # makepp color from css, which is supposed to come from the camelbook
     udg => 'a("COLOR","#0090e0")'}
 );

our %foot = ( dot => '}', udg => ']' );


my $type = 'udg';
my( $because, $dependencies, $includes );
my( $down, $up, $rename, $merge );
my( @logfiles, $outfile );

{
  my $tmp;
  TextSubs::getopts
    ['b', qr/because|build[-_]?reasons?/, \$because],
    ['D', qr/dependenc(?:ies|y)/, \$dependencies],
    ['d', qr/down(?:wards?)/, \$down],
    ['g', qr/graphviz|dot/, \$type, 0, 'dot'],
    ['I', qr/include(?:[-_]?dir)?/, \$tmp, 1, sub { unshift @INC, $tmp }],
    ['i', qr/includes/, \$includes],
    ['l', qr/log(?:[-_]?file)?/, \$tmp, 1, sub { push @logfiles, $tmp }],
    [qw(M module), \$tmp, 1, sub { eval_or_die "use $tmp" }],
    [qw(m merge), \$merge, 1],
    [qw(o output), \$outfile, 1],
    [qw(r rename), \$rename, 1],
    [0, 'version', undef, 0, \&FileInfo::version],
    ['u', qr/up(?:wards?)/, \$up];
}

$dependencies = 1 if !$includes;
my $build_re = $because ?
  qr/(?!NOT)(?:PHON(Y)|(CHANGED|MARK_NEW|OLD))?/ :
  qr/PHON(Y)/;

for( $rename, $merge ) {
  $_ or next;
  $_ = eval_or_die "package Rewrite; sub { $_ }";
}
$rename ||= \&Rewrite::cwd;

find_logfiles @logfiles;
for( $outfile ) {
  $_ = $logfiles[0] if !defined;
  last if $_ eq '-';
  s!^\.makepp/+!! || s!/+\.makepp/+!/!;
  s!(?:\.\w+)?$!.$type!;
  open STDOUT, '>', $_;
}



# Digest the contents of the log file.
my %graph;
for( @logfiles ) {
  open my $log, $_ or die "$::progname: can't open `$_'--$!\n";

  <$log>;			# 1st line irrelevant here

  my( %dir_name, %file_name );
  while( <$log> ) {
    chop;

    s/^[\02\03]//s;		# Graphs don't care about indentation.

    if( /\01/ ) {		# A key/finfos line?
      chop( $_ .= <$log> ) while !/\01$/s;
      # Extract the name definitions
      while( s/([\da-f]+)\03([^\01-\03]+)(?:\03([^\01-\03]+)(?:\03([^\01-\03]+))?)?/$1/ ) {
	#my( $key, $name, $dirkey, $dirname ) = ( $1, $2, $3, $4 ) -- expensive copy op
	if( defined $3 ) {	# With dirname
	  if( defined $4 ) {	# Dirname not yet known
	    $dir_name{$3} = $4; # Save orig for concatenating
	    for( "$4" ) {
	      &$rename();
	      $file_name{$3} = $_ if defined && $_ ne '';
	    }
	  }
	  for( "$dir_name{$3}/$2" ) {
	    $dir_name{$1} = $_;	# Might be a dir.
	    &$rename();
	    $file_name{$1} = $_ if defined && $_ ne '';
	  }
	} else {
	  for( "$2" ) {
	    &$rename();
	    $file_name{$1} = $_ if defined && $_ ne '';
	  }
	}
      }
      my( $key, @args ) = split /\01/, "$_-";
      pop @args;		# Remove the - we added above to work around
				# the stupid end handling of split.

      if( $includes ) {
	if( $key eq 'INCL' ) {
	  $graph{$file_name{$args[0]}}{$file_name{$args[1]}} |= INCL
	    if exists $file_name{$args[1]} && exists $file_name{$args[0]};
	  next;
	} elsif( $key eq 'LOAD_INCL' ) {
	  for( $args[1] ) {
	    if( s/:\d+$//	 ) {	# Strip the line number.
	      &$rename();
	    } elsif( exists $file_name{$_} ) {
	      $_ = $file_name{$_};
	    } else {
	      next;
	    }
	    $graph{$_}{$file_name{$args[0]}} |= INCL
	      if defined && $_ ne '' && exists $file_name{$args[0]};
	  }
	  next;
	}
      }

      next if !$because && !$dependencies;
      if( $key =~ /^BUILD_$build_re/o ) {
	if( exists $file_name{$args[0]} ) {
	  if( $2 ) {		# built because of other files?
	    exists $file_name{$_} and
	      $graph{$file_name{$args[0]}}{$file_name{$_}} |= BECAUSE
		for split /\02/, $args[1];
	  } else {		# built because it's phony or other reason?
	    $graph{$file_name{$args[0]}}{+SELF} |= $1 ? PHONY : BECAUSE;
	  }
	}
      } elsif( $args[1] && $key eq 'DEPEND' ) { # Anything these depend on?
	my @dependencies;
	for my $start ( split /\02/, $args[0] ) {
	  next if !exists $file_name{$start};
	  @dependencies = map { exists $file_name{$_} ? $file_name{$_} : () } split /\02/, $args[1]
	    or last
	    if !@dependencies;	# Calculate list when 1st needed.
	  $graph{$file_name{$start}}{$_} |= DEPEND
	    for @dependencies;
	}
      }
    }
  }
}



# Selection from cammand line.
if( @ARGV ) {			# Start with the args and follow them up and/or downwards.
  @ARGV =
    grep { $_ = absolute_filename $_; &$rename; $_ ne '' } map Glob::zglob_fileinfo( $_ ), @ARGV;
  my %tmp;

  &$::both_up_down( \$outfile, \$up, \$down ) if $::both_up_down; # Hack to speed up the regression
				# test, by doing all before here only once for selection.

  $up = $down = 1
    if !$up && !$down;		# Neither chosen means go both ways.

  if( $down ) {
    my @list = @ARGV;
    while( @list ) {
      my $elt = shift @list;
      for( keys %{$graph{$elt}} ) {
	push @list, $_
	  if !exists $tmp{$elt}{$_};
	$tmp{$elt}{$_} |= $graph{$elt}{$_};
      }
    }
  }

  if( $up ) {
    my %origin;			# Prepare an inverted graph of edge origins.
    for my $start ( keys %graph ) {
      $_ ne SELF && undef $origin{$_}{$start}
	for keys %{$graph{$start}};
    }

    while( @ARGV ) {		# Walk upwards iteratively.
      my $elt = shift;
      for( keys %{$origin{$elt}} ) {
	push @ARGV, $_;
	delete $origin{$elt}{$_};
	$tmp{$_}{$elt} |= $graph{$_}{$elt};
	$tmp{$elt}{+SELF} ||= 0; # Having leaves as keys (pseudo inner nodes) makes next loop simpler.
      }
    }

    for my $start ( keys %graph ) { # Ensure that the new lower border loses no info.
      if( $because && exists $tmp{$start} ) {
	for( keys %{$graph{$start}} ) {
	  if( $_ ne SELF && !exists $tmp{$start}{$_} && $graph{$start}{$_} & BECAUSE ) {
	    $tmp{$start}{+SELF} |= BECAUSE; # We eliminated a BECAUSE edge, put info on SELF.
	    last;
	  }
	}
      }

      $tmp{$start}{+SELF} |= $graph{$start}{+SELF} # Didn't copy these on the way up.
	if exists $tmp{$start} && exists $graph{$start}{+SELF};
    }
  }

  %graph = %tmp;		# This benchmarks as more efficient than a
				# normal copy.  Maybe perl just moves the
				# pointer, since %tmp is no longer needed.
}



sub apply_rename(\%) {
  {
    my %tmp;
    for( keys %graph ) {
      if( !$_[0]{$_} ) {
	$tmp{$_} = $graph{$_};
      } elsif( $tmp{$_[0]{$_}} ) {
	%{$tmp{$_[0]{$_}}} = (%{$tmp{$_[0]{$_}}}, %{$graph{$_}});
      } else {
	$tmp{$_[0]{$_}} = $graph{$_};
      }
    }
    %graph = %tmp;
  }

  for my $node ( keys %graph ) {
    for( keys %{$graph{$node}} ) {
      if( $_[0]{$node} && $_[0]{$node} eq ($_[0]{$graph{$node}{$_}} || '') ) {
	delete $graph{$node}{$_};
      }
    }
  }
}

if( $merge ) {
  my %rename;

  # Find pairs to merge.
  for( keys %graph ) {
    next if $rename{$_};	# May already have been renamed below via an
				# edge from another node.
    my $new = &$merge( $graph{$_} );
    next if !$new;
    $rename{$_} = $rename{$new->[1]} = $new->[0];
  }

  # Merge them in a 2nd step, because had we done it in the 1st we might have
  # left some as they were, before discovering a new name for them.
  apply_rename %rename;
}


&$::udg_dot( $outfile, \$type ) if $::udg_dot;	# Hack to speed up the
				# regression test, by doing all before
				# here only once for both file types.


my $sep = $type eq 'dot' ? ' ' : ',';
my $back = $type eq 'dot' ? '' : 'a("_DIR","first")';
my $both = $type eq 'dot' ? 'dir=both' : 'a("_DIR","both")';
my %node_extra;			# What kinds of extra attributes we get.
sub node_attr() {
  $node_extra{$graph{$_}{+SELF} || ''} ||= # Remember this combination for reuse.
    ($because && exists $graph{$_}{+SELF} && $graph{$_}{+SELF} & BECAUSE ? "$sep$because_self{$type}" : '') .
    (exists $graph{$_}{+SELF} && $graph{$_}{+SELF} & PHONY ? "$sep$phony{$type}" : '');
  for( my $i = 0; $i < @file_attr; $i += 2 ) {
    return $sep . $file_attr[$i+1]{$type} . $node_extra{$graph{$_}{+SELF} || ''}
      if /$file_attr[$i]/;
  }
  $node_extra{$graph{$_}{+SELF} || ''};
}

sub edge($$$$$;$) {
  my( $start ) = @_;
  if( $dependencies ) {
    # Unlike the include edge below, this one is rather convoluted, because it
    # checks for both-ended arrows if either both are BECAUSE, or else if
    # neither is but both are DEPEND.
    my $style;
    my $found_both = $because && $_ ne $start && $graph{$_}{$start} && $graph{$_}{$start} & BECAUSE;
				# Assume it might go both ways, e.g. from &dir.
    if( $because && $graph{$start}{$_} & BECAUSE ) {
      $style = $because{$type};
    } else {
      $found_both = $found_both ? 0 : # Misassumed above.
	($_ ne $start && $graph{$_}{$start} && $graph{$_}{$start} & DEPEND);
				# Again assume it might go both ways.
      if( $graph{$start}{$_} & DEPEND ) {
	$style = '';
      } else {
	$found_both = 0;	# Misassumed again.
      }
    }
    if( defined $style ) {	# It's a dependency, maybe "because".
      if( $found_both ) {
	$style = $style ? "$style$sep$both" : $both;
	$graph{$_}{$start} &= ~(DEPEND | BECAUSE); # Don't do it again later.
      } elsif( $back ) {
	$style = $style ? "$style$sep$back" : $back;
      }
      print $style ? "$_[1]$_[2]$style$_[3]$_[4]" : "$_[1]$_[4]";
      &{$_[5]} if $_[5];
    }
  }
  if( $includes && $graph{$start}{$_} & INCL ) {
    my $style = $include{$type};
    if( $_ ne $start && $graph{$_}{$start} && $graph{$_}{$start} & INCL ) {
      $style .= "$sep$both";
      $graph{$_}{$start} &= ~INCL; # Don't do it again later.
    } elsif( $back ) {
      $style .= "$sep$back";
    }
    print "$_[1]$_[2]$style$_[3]$_[4]";
    &{$_[5]} if $_[5];
  }
}

my( $id, %id ) = 'a';
sub DOTid {
  #$id++ if $id =~ /^(?:edge|node|(?:|di|sub)graph|strict)$/; # "edge" would be reached after 90000 nodes...
  print "$id [label=\"$_\"" . &node_attr . "]\n";
  $id++;
}

sub UDGid {
  print qq!l("$id",n("",[a("OBJECT","$_")! . &node_attr . "],[\n";
  $id{$_} = $id++;
  my $start = $_;
  for( reverse sort keys %{$graph{$_}} ) {
    next if $_ eq SELF;
    edge $start, q!e("",[!, '', '', '],', sub {
      if( $id{$_} ) {
	print qq!r("$id{$_}")),\n!;
      } else {
	&UDGid;
	print "),\n";
      }
    }
  }
  print "]))\n";
}

print $head{$type};
if( $type eq 'dot' ) {
  for( sort keys %graph ) {
    next if $_ eq SELF;
    $a = $id{$_} ||= &DOTid;
    my $start = $_;
    for( sort keys %{$graph{$_}} ) {
      next if $_ eq SELF;
      $b = $id{$_} ||= &DOTid;
      edge $start, "$a -> $b", ' [', ']', "\n";
    }
  }
} else {
  $id{$_} or &UDGid, print ',' for reverse sort keys %graph;
}
print $foot{$type};
