#!/usr/local/bin/perl

use File::Basename;
use File::Spec 0.82;
use Getopt::Long;

use constant MSWIN => $^O =~ /MSWin32|Windows_NT/i;

use strict;

my $prog = basename($0, qw(.pl));

sub usage {
    my $msg = shift;
    my $rc = (defined($msg) && !$msg) ? 0 : 2;
    if ($rc) {
	select STDERR;
	print "$prog: Error: $msg\n\n" if $msg;
    }
    print <<EOF;
Usage: $prog [flags] pattern ...
Flags:
   -help		Print this message and exit
   -backwards		Work from objects back towards sources
   -check		(with -cr) Check specified DO's for consistency
   -cr DO,...		Comma-separated list of DO's to dump in the CRDB
   -db file,...		Comma-separated list of *.crdb files to read in
   -dir	<dir>		Dir to search for *.crdb files (default: the cwd)
   -exact		Treat <pattern> as an absolute path, not a pattern
   -indent <str>	String to indent recursive output (default: 3 spaces)
   -recurse		Chase down dependencies recursively
   -save		(with -cr) Save DB resulting from DO 'foo' as 'foo.crdb'
   -terminals		Like -recurse but print only terminal targets
   -verbose		Provide informational messages
Note:
    All flags may be abbreviated to their shortest unique name.
Examples:
    $prog -cr /vobs_foo/solaris/.AUDIT libxyz.a
    $prog -backwards -rec foo.o
EOF
    exit $rc;
}

my %opt;
GetOptions(\%opt, qw(backwards check cr=s@ db=s@ dir=s exact fmt=s help
		     indent=s macro recurse save terminals verbose));
usage() if $opt{help};

sub note {
    print STDERR "$prog: @_\n" if $opt{verbose};
}

$opt{recurse} ||= $opt{terminals};	# one implies the other

my $delim = MSWIN ? ',;' : ',;:';
my $ext = 'crdb';

# Use whichever storage format is requested, providing a default.
my $fmt = 'ClearCase::CRDB::' . ($opt{fmt} || 'Dumper');
eval "require $fmt";
my $cr = $fmt->new;

if ($opt{cr} || $ENV{CRDB_DO}) {
    my $crlist = $opt{cr} ? join(',', @{$opt{cr}}) : $ENV{CRDB_DO};
    my @crs = map {split /[$delim]/} $crlist;
    $cr->crdo(@crs);
    if ($opt{check}) {
	note "Checking @crs CR for internal consistency ...";
	$cr->check;
    }
    note "Translating @crs CR to CRDB format ...";
    $cr->catcr;
    if ($opt{db}) {
	die "$prog: Error: can write to only one database file"
							if @{$opt{db}} != 1;
	note "Writing CRDB data to $opt{db}->[0].$ext ...";
	$cr->store($opt{db}->[0]);
    } elsif ($opt{save}) {
	note "Writing CRDB data to $crs[0].$ext ...";
	$cr->store("$crs[0].$ext");
    }
} elsif ($opt{db} || $ENV{CRDB_DB}) {
    my $dblist = $opt{db} ? join(',', @{$opt{db}}) : $ENV{CRDB_DB};
    my @dbs = map {split /[$delim]/} $dblist;
    for (@dbs) {
	$_ .= ".$ext" unless /$ext$/;
    }
    $cr->load(@dbs);
} else {
    my $path = $opt{dir} || $ENV{CRDB_PATH} || '.';
    my @found;
    for my $dir (split /[$delim]/, $path) {
	push(@found, glob("$dir/.*.$ext"), glob("$dir/*.$ext"));
    }
    die "$prog: Error: no CRDB databases found" unless @found;
    my $s = @found > 1 ? 's' : '';
    note "Using @found database$s ...";
    $cr->load(@found);
}

$opt{indent} ||= '   ';

my @matches;

{
    my %seen;
    my $direction = $opt{backwards} ? 'needs' : 'makes';
    my $type = $opt{macro} ? 'var' : 'do';
    my $query = "${direction}_$type";
    if ($opt{exact}) {
	@matches = map {File::Spec::Unix->rel2abs($_, $cr->iwd)} @ARGV;
    } else {
       my $match_method = "matches_$type";
       @matches = $cr->$match_method(@ARGV);
    }
    sub whouses {
	my $level = shift;
	my $do = shift;
	my $pad = $opt{indent} x $level;
	for my $used ($cr->$query($do)) {
	    next if $used =~ m%/\.AUDIT%i; # HACK - see PODs
	    if ($opt{terminals} && grep !m%/\.AUDIT%i, $cr->$query($used)) {
		whouses($level, $used);
	    } else {
		print "$pad$used\n" unless $opt{terminals} && $seen{$used};
		$seen{$used} = 1;
		whouses($level+1, $used) if $opt{recurse};
	    }
	}
    }
}

for (@matches) {
    print $_, $opt{backwards} ? ' <=' : ' =>', "\n";
    whouses(1, $_);
}

exit 0;

__END__

=head1 NAME

whouses - impact analysis in a clearmake build environment

=head1 MOTTO

I<You give me a clean CR, I'll give you a clean impact analysis.>

=head1 SYNOPSIS

Run this script with the C<-help> option for usage details. Here are
some additional sample usages with explanations:

  whouses foobar.h

Shows all DO's that make use of any file matching /foobar.h/.

  whouses -recurse foobar.h

Same as above but follows the chain of derived files recursively.

  whouses -exact /vobs_xyz/include/foobar.h

Shows all DO's that make use of the specified file (must be given as a
an absolute pathname).

=head1 DESCRIPTION

I<Whouses> provides a limited form of "impact analysis" in a clearmake
build environment. This is different from traditional impact analysis
(see B<CLASSIC CODE ANALYSIS COMPARED> below for details) and in
particular operates at the granularity of files rather than language
elements.

I<Whouses> is best described by example. Imagine you have a VOB
'/vobs_sw' in which you build the incredibly simple application I<foo>
from I<foo.c>.  You have a Makefile which compiles I<foo.c> to I<foo.o>
and then links it to produce I<foo>. And let's further assume you've
just done a build using clearmake.

Now, I<foo> is a DO which has a config record ("CR") showing how it was
made. I<Whouses> analyzes that CR and prints the data in easy-to-read
indented format.  For instance:

	% whouses -cr foo foo.c
	/vobs_sw/src/foo.c  =>
	  /vobs_sw/src/foo.o

The "-cr foo" points to the derived object from which to extract and
analyze the CR; it will be implicit in the remaining examples.  The
output indicates that I<foo.o B<uses> foo.c>, or in other words that
I<foo.c> is a contributor to I<foo.o>. If we add the I<-recurse> flag:

	% whouses -r foo.c
	/vobs_sw/src/foo.c =>
	  /vobs_sw/src/foo.o
	    /vobs_sw/src/foo

We see all files to which I<foo.c> contributes, indented according to
how many generations removed they are. If we now add I<-terminals>

	% whouses -r -t foo.c
	/vobs_sw/src/foo.c =>
	  /vobs_sw/src/foo

Intermediate targets such as I<foo.o> are suppressed so we see only the
"final" targets descended from I<foo.c>.

We can also go in the other direction using I<-backwards>:

	% whouses -b -e foo
	/vobs_sw/src/foo <=
	  /vobs_sw/src/foo.o

Which shows I<foo.o> as a progenitor of I<foo>. Note that the arrow
(B<E<lt>=>) is turned around to indicate I<-backwards> mode. We also
introduced the I<-exact> flag here. By default arguments to I<whouses>
are treated as patterns, not file names, and since I<foo> has no
extension it would have matched I<foo.c> and I<foo.o> as well. We can
combine recursion with backwards mode:

	% whouses -b -e -r foo
	/vobs_sw/src/foo <=
	  /vobs_sw/src/foo.o
	      /vobs_sw/src/foo.c
	      /vobs_sw/src/foo.h
	      /vobs_sw/src/bar.h

And discover that I<foo.h> and I<bar.h> were also used.

When used recursively in the forward direction, this script answers the
question "if I change file X, which derived files will need to be
rebuilt"? This is the classic use, the one for which it was written.

Because extracting a recursive CR can be quite slow, I<whouses>
provides ways of dumping the CR data to a file representation for
speed. Use of I<-save>:

	% whouses -cr foo -save ...

will write out the data to I<foo.crdb>. Subsequently, if I<foo.crdb>
exists and is newer than I<foo> data will be taken from it rather
than re-generating the recursive I<catcr> report. See also the I<-db>
and I<-fmt> flags.

The default save format is that of I<Data::Dumper>. It was chosen
because it results in a nicely indented, human-readable text format
file. Provision is made for subclassing to alternate storage formats,
and subclasses for I<Storable> and I<Data::Denter> are provided as
examples. Although the I<Storable> format may be somewhat faster than
the default, the real reason for the override capability is to allow an
actual, fast database to be used.

=head1 .AUDIT FILES

Derived objects matching the Perl RE /\.AUDIT/i are ignored. These are
presumed to be "meta-DO's" by convention, which aren't part of the
build per se but rather pseudo-targets whose only purpose is to hold
CR's which refer back to all real deliverables.

=head1 CLASSIC CODE ANALYSIS COMPARED

I<Whouses> can best be described by comparison with "real" impact
analysis products. There are a number of full-scale tools on the
market.  Typically, these work by parsing the source code into some
representation which they can then analyze. It's a powerful technique
but entails some tradeoffs:

=head3 MINUSES

=over 4

=item *

A true code analysis tool must have knowledge of each programming
language in use. I.e. to add support for Java, a Java parser must be
added.

=item *

A corollary of the above is that this tool requires lot of work
by expert programmers. Thus the tools tend to be large, complex
and expensive.

=item *

Another corollary is that the tool must track each advance
in each language, usually with significant lag time.

=item *

Also, since analysis basically entails compiling the code,
analysis of a large code base can take a long time,
potentially as long or longer than actually building it.

=item *

If some part of your application is written in a language the tool
doesn't know (say Python or Visual Basic or Perl or an IDL), no
analysis of that area can take place.

=back

=head3 PLUSES

=over 4

=item *

The analysis can be as granular and as language-knowledgeable as its
developers can make it. If you change the signature of a C function, it
can tell you how many uses of that function, in what files and on what
lines, will need to change.

=item *

A code analysis tool may be tied to a set of languages but by the same
token it's NOT tied to a particular SCM or build system.

=back

The minuses above are not design flaws but inherent tradeoffs.  For
true code impact analysis you must buy one of these tools and accept
the costs.

Whouses doesn't attempt code analysis per se.  As noted above, true
code analysis programs are tied to language but not to an SCM system.
Whouses flips this around; it doesn't care about language but it only
works with build systems that use clearmake, which is part of
ClearCase.

=head1 AUTHOR

David Boyce <dsb@boyski.com>

=head1 COPYRIGHT

Copyright (c) 2000-2001 David Boyce. All rights reserved.  This Perl
program is free software; you may redistribute and/or modify it under
the same terms as Perl itself.

=head1 STATUS

This is currently ALPHA code and thus I reserve the right to change the
UI incompatibly. At some point I'll bump the version suitably and
remove this warning, which will constitute an (almost) ironclad promise
to leave the interface alone.

=head1 PORTING

I've tried to write this in a platform independent style but it has not
been heavily tested on Windows (actually it hasn't been all that
heavily tested anywhere). It does pass "make test" on Windows and
appears to work fine in limited testing.

=head1 SEE ALSO

perl(1), ClearCase::CRDB(3)

=cut
