#!/usr/local/bin/perl
#
# cfman v2.0: man page cross-referencer
# author: Tom Christiansen <tchrist@convex.com>
# date: 15 November 89
# 
# updated to v2.1: 19 May 91 by author
#   now understands dbm database
#
# usage: cfman [ -d debug-devel ] [ -s sub-sections ] 
#	       [ -p manpath ] [ -x xrefpath ] 

($iam = $0) =~ s%.*/%%;
 
$] =~ /(\d+\.\d+).*\nPatch level: (\d+)/;
die "$iam: requires at least perl version 4.0 to run correctly\n" if $1 < 4.0;

#eval <<'EOF';

require 'getopts.pl';

&Getopts('fd:s:p:P:x:') || &usage;


$manpath = $opt_p if defined $opt_p;
$manpath = $opt_P if defined $opt_P;
$manpath = $ENV{'MANPATH'} unless $manpath;
$manpath = "/usr/man" unless $manpath;
@manpath = split(/:/,$manpath);

$opt_x =~ /^:/ && ( $opt_x = $manpath . $opt_x );
@xrefpath = $opt_x ? split(/:/,$opt_x) : @manpath;

$debug = $opt_d;

unless ($opt_s) {
    $all_sections++;
} else {
    $delim = ':' if $opt_s =~ /:/;
    for (split(/$delim/o, $opt_s)) { $sections{$_} = 1; }
} 

if ($debug) {
    $" = ':';
    print "manpath is @manpath\n";
    print "xrefpath is @xrefpath\n";
    $" = ' ';
} 

&open_databases;
if (@ARGV) {
    local($file);
    $root = $manpath[0]; # punt
    for $manpage (@ARGV) {
	undef %checked;
	&read_manpages($manpage);
	($catpage = $manpage) 
	    =~ s,^(.*)/man([^\.]*)(\.Z)?/([^/]*)$,$1/cat$2/$4,;
	&read_catpage($catpage) if $opt_f && -e $catpage;
    } 
} else { 
    for $root (@manpath) {
	if (!defined $dbase{$root}) {
	    warn "no dbase for $root\n";
	    next;
	} 
	local(*manroot) = $dbase{$root};
	local($key, $value);
	while (($key,$value) = each %manroot) {
	    for (split(/\002/, $value)) {
		if (/\001/) {  # don't do indirects
		    ($cmd, $page, $section, $desc) = split(/\001/);
		    undef %checked;
		    $manpage =  "$root/man$section/$page";
		    next unless $all_sections || defined $sections{$section};
		    &read_manpages($manpage);
		    ($catpage = $manpage) 
			=~ s,^(.*)/man([^\.]*)(\.Z)?/([^/]*)$,$1/cat$2/$4,;
		    &read_catpage($catpage) if $opt_f && -e $catpage;
		} else {
		    #print "skipping $key, indirect to $_\n" if $debug;
		} 
	    }
	}
    } 
}


############################################################################
#
# read_manpages()
#
#	passed a list of filename, which are man pages.  opens each one
#	verifying that the file really is in the place that the .TH line.
#	skips to SEE ALSO section and then verifies existence of each 
#	referenced man page.
############################################################################


sub read_manpages {
    local (@pages) = @_;

    local ($junk, $sopage, $basename, $line, $page, $pname, $pext, $gotTH);
    local(%seen);


page:
    foreach $page ( @pages ) {
	next page if $page =~ /\.(BAK|OLD)$/i;

	if ($seen{$page}++) {
	    print "already saw $page\n" if $debug & 1;
	    next page;
	}

	$page = "gzip $page |" if $page =~ /\.z$/;

	if (!open page) {
	    warn "couldn't open $page: $!\n";
	    next page;
	}

	$0 = "cfman (checking $page)";
	print "checking $page\n" if $debug & 1;

	$gotTH = 0;
	$line = 0;
	$sopage = '';

line:   while (<page>) {
	    print if $debug & 16;
	    next line if /^'''/ || /^\.\\"/;

	    # deal with .so's on the first line.
	    # /usr/ucb/man uses this instead of links.
	    if (!($line++) && /^\.so\s+(.*)/) {
		$sopage = $1;
		next if $sopage =~ m#mh/lib/tmac.h#;
		print "$page -> $sopage\n" if $debug & 1;
		($basename = $sopage) =~ s%.*/%%;
		if ($seen{$basename}++) {
		    print "already saw $basename\n" if $debug & 1;
		    next page;
		} 
		$sopage = "$root/$sopage" unless $sopage =~ m#^/#;
		if (!open(page,$sopage)) {
		    print "$page: cannot open $sopage: $!\n";
		    next page;
		} 
		$page = $basename;
		next line;
	    } 

	    # check for internally consistent .TH line
	    if ( /^\.(TH|SC)/ ) { # SC is for mh
		 $gotTH++;
		 printf STDERR "TH checking %s", $_ if $debug & 4;
		 do flush();
		 s/"+//g;
		 ($junk, $pname, $pext) = split;
		 if (&macro($pname)) {
			printf STDERR "%s: can't resolve troff macro in .TH: %s\n",
			    $page, $pname;
			next line;
		 } 
		 $pext =~ y/A-Z/a-z/;
		 $pname =~ s/\\-/-/g;
		 $pname =~ y/A-Z/a-z/ if $pname =~ /^[\$0-9A-Z_\055]+$/;
		 ($pexpr = $page) =~ s/([.+])/\\$1/g;
		 $pexpr =~ s%.*/%%;
		 if ( "$pname.$pext" !~ /^$pexpr$/i) {
		      printf "%s: thinks it's in %s(%s) [OK]\n", 
			  $page, $pname, $pext;
		 } 
		 next line;
	    }

	    next line unless /^\.S[Hh]\s+"?SEE ALSO"?/
		|| /^\.S[Hh]\s+REFERENCES/	# damn posix
		|| /^\.Sa\s*$/; 		# damn mh

	    # finally found the cross-references
xref:       while (<page>) {
		print if $debug & 16;
		last line if /^\.(S[Hh]|Co|Hi|Bu)/; # i really hate mh macros
		next xref unless /\(/;
		next xref if /^.PP/;
		chop;
		s/\\f[RIPB]//g;
		s/\\\|//g;
		s/\\-/-/g;
entry:          foreach $entry ( split(/,/) ) {
		    next entry unless $entry =~ /\(.*\)/;
		    $pname = ''; $pext = '';
		    $1 = ''; $2 = '';
		    ($pname, $pext) = 
			($entry =~ /([A-Za-z0-9\$._\-]+)\s*\(([^)]+)\).*$/); 
		    if (&macro($pname)) {
			printf "%s: can't resolve troff macro in SEE ALSO: %s\n",
			    $page, $pname;
			next entry;
		    } 
		    next entry if !$pname || !$pext || $pext !~ /^\w+$/;
		    $pext =~ y/A-Z/a-z/;
		    do check_xref($page,$pname,$pext);

		}	# entry: foreach $entry ( split(/,/) ) 
	    }		# xref:  while (<page>)
	}		# line:  while (<page>) 
	printf "%s: missing .TH\n", $page if (!$gotTH);
    }  			# page:  foreach $page ( @pages )
}     			# sub    read_manapages


###########################################################################
#
# check_xref()
#
#	given the name of the page we're looking for, check for a
#	cross reference of a given man page and its assumed subsection
#
###########################################################################

sub check_xref {
    local ($name, $target, $section) = @_;
    local(@names);
    local($wantname);
    local($ok);

    $wantname  = "$target.$section";

    print " $name has xref of $target($section)\n" if $debug & 2;

    if ($checked{$wantname}++) {
	print "  (already checked)\n" if $debug & 2;
	return;
    } 

    unless (@names = &pathcheck($target,$section)) {
	print "$name: $target($section) missing\n";
	return;
    } 

    print "   $target -> @names\n" if $debug & 8;

    return if grep($wantname eq $_, @names);

    $wantname  =~ s/(\W)/\\$1/g;
    $ok =  " [OK]" if grep(/^$wantname/, @names);

    grep(s/\.([^.]+)$/($1)/, @names);

    print "$name: $target($section) really in ",
	join(", ", @names);

    print $ok;

    print "\n";

    do flush();
}

###########################################################################
#
# pathcheck()
#
#	takes a name (like 'tty') and a section (like '1d')
#	and looks for 'tty.1d' first in the current root, 
#	then in all other elements of @xrefpath.  
#
#	returns name and section if found, otherwise all pages
#	where found.
#
###########################################################################

sub pathcheck {
    local ($name, $section) = @_;
    local($cmd, $page, $mysect, $sect, $desc);
    local($entry, @entries);
    local($wantname) = "$name.$section";
    local(%recursed) = ($name, 1);

    ($pat = $name) =~ s/(\W)/\\$1/g;

    for $path ( @xrefpath ) {
	for $entry (&quick_fetch($name, $dbase{$path})) {
	    ($cmd, $page, $sect, $desc) = split(/\001/, $entry);
	    ($mysect) = $page =~ /\.([^.]+$)/;
	    return $wantname if $cmd =~ /\b$pat\b/ && $mysect eq $section;
	    push(@entries, $page); #  if $target eq $cmd;
	}
    } 
    return @entries;
} 


sub flush {
    $| = 1; 
    print ''; 
    $| = 0;
}

sub macro {
    @_[0] =~ /^\\\*\(/;
} 


sub usage {
    die "usage: $iam [-d debug-level] [-s sub-sections] [-p manpath] 
    	[-x xrefpath] [pattern ...] \n";
}


############################################################################
#
# open_databases
#
#	open all databases in manpath.  the %dbase array contains
#	the pointers to each individual dbm file, keyed on pathname.
#	yes, this is recursive structures in perl -- caveat emptor.
#	no, you aren't expected to understand this.
#
############################################################################
sub open_databases {
    local($gensym) = 'dbm0000'; 
    local($path);

    for $path (@xrefpath) {
	$dbase{$path} = $gensym++;
	local(*dbase) = $dbase{$path};
	$dbase = $path;  # rude, but useful
	die "couldn't dbmopen $path/whatis: $!\n"
	    unless dbmopen(dbase, "$path/whatis", undef);
    } 
} 


# --------------------------------------------------------------------------
# do a quick fetch of a key in the dbm file, recursing on indirect references
# --------------------------------------------------------------------------
sub quick_fetch {
    local($key,*array) = @_;
    local(@retlist) = ();
    local(@tmplist) = ();
    local($_, $entry);
    local($name, $ext);
    local(@newlist);
    local($entry);

    print "fetching on $key from $array\n" if $debug & 32;

    return () unless $entry = $array{$key};

    @tmplist = split(/\002/, $entry);
    for (@tmplist) {
	if (/\001/) {
	    push(@retlist, $_);
	} else {
	    ($name,$ext) = /(.+)\.([^.]+)$/;
	    print "recursing on $name\n" if $debug & 32;
	    @newlist = &quick_fetch($name, *array) unless
		$recursed{$name}++; # true/false, explain/diction idiocy
	    print "got @newlist on recurse\n" if $debug & 32;
	    push(@retlist,@newlist);
	}
    }
    print "return @retlist from fetch from $array\n" if $debug & 32;
    return @retlist;
}

sub read_catpage {
    local($FILE) = @_;
    local($i, $_);
    local($page, $ext);

    open FILE || die "can't open $FILE: $!";
    $frag = '';
    while (<FILE>) {
	$i = 0;
	next unless /\010/;
	if ($frag) {
	    s/^ +//;
	    $_ = $frag . $_;
	    $frag = '';
	} 
	if (s/((_\010.)+_\010-)$//) {
	    ($frag = $1) =~ s/_\010-//;
	} 
	for (grep($i++ % 3 == 1, split(/((_\010.|\.)+\([1-8nlpo]\w?\))/))) {
	    s/_\010//g;
	    print "   cat xref: $_\n" if $debug & 64;
	    ($page, $ext) = /(.+)\((.+)\)/;

	    $page = "\l$page" if $page =~ /^[A-Z][._a-z]+/ &&
				 $page !~ /^L[-.]/; # L.sys, etc.
	    $ext = "\L$ext";
	    next if length($page) == 1 && $ext eq '3b'; # equations
	    &check_xref($FILE, $page, $ext);
	} 
    } 
	
    } 

#EOF
#die $@ if $@;
