#! /usr/bin/perl 
#
#  SGMLTools.pm
#
#  $Id: SGMLTools.pm,v 1.40 1998/10/13 11:54:59 cg Exp $
#
#  SGML-Tools driver core. This contains all the basic functionality
#  we need to control all other components.
#
#   Copyright 1996, Cees de Groot.
#
package SGMLTools;

require 5.004;
use strict;

=head1 NAME

SGMLTools - SGML conversion utilities

=head1 SYNOPSIS

  use SGMLTools;
  SGMLTools::init;
  @files = SGMLTools::process_options ($0, @ARGV);
  for $curfile (@files) {
    SGMLTools::process_file ($curfile);
  }

=head1 DESCRIPTION

The SGMLTools package encapsulates all the functionality offered by
SGML-Tools. It is used, of course, by SGML-Tools; but the encapsulation
should provide for a simple interface for other users as well. 

=head1 FUNCTIONS

=over 4

=cut

use DirHandle;
use File::Basename;
use File::Find;
use File::Copy;
use FileHandle;
use IPC::Open2;
use Cwd;
use SGMLTools::Lang;
use SGMLTools::Utils qw(process_options usage cleanup trap_signals remove_tmpfiles);
use SGMLTools::Vars;

sub BEGIN
{
  #
  #  Make sure we're always looking here. Note that "use lib" adds 
  #  on the front of the search path, so we first push dist, then
  #  site, so that site is searched first.
  #
  use lib "$main::LibDir/dist";
  use lib "$main::LibDir/site";
}

=item SGMLTools::init

Takes care of initialization of package-global variables (which are actually
defined in L<SGMLTools::Vars>). The package-global variables are I<$global>,
a reference to a hash containing numerous settings, I<%Formats>, a hash
containing all the formats, and I<%FmtList>, a hash containing the currently
active formats for help texts. 

Apart from this, C<SGMLTools::init> also finds all distributed and site-local
formatting backends and C<require>s them.

=cut

sub init
{
  trap_signals;

  #
  #  Register the ``global'' pseudoformat. Apart from the global settings,
  #  we also use $global to keep the global variable name space clean; 
  #  everything that we need to provide to other modules is stuffed
  #  into $global.
  #
  $global = {};
  $global->{NAME} = "global";
  $global->{HELP} = "";
  $global->{OPTIONS} = [
    { option => "papersize", type => "l",
      'values' => [ "a4", "letter" ], short => "p" },
    { option => "language",  type => "l",
      'values' => [ @SGMLTools::Lang::Languages ], short => "l" },
    { option => "charset",   type => "l",
      'values' => [ "latin", "ascii" ], short => "c" },
    { option => "style",     type => "s", short => "S" },
    { option => "tabsize",   type => "i", short => "t" },
    { option => "verbose",   type => "f", short => "v" },
    { option => "debug",     type => "f", short => "d" },
    { option => "define",    type => "s", short => "D" },
    { option => "include",   type => "s", short => "i" },
    { option => "pass",      type => "s", short => "P" }
  ];
  $global->{papersize} = "a4";
  $global->{language}  = "en";
  $global->{charset}   = "ascii";
  $global->{style}     = "";
  $global->{tabsize}   = 8;
  $global->{verbose}   = 0;
  $global->{define}    = "";
  $global->{debug}     = 0;
  $global->{include}   = "";
  $global->{pass}      = "";
  $global->{InFiles}   = [];
  $Formats{$global->{NAME}} = $global;	# All formats we know.
  $FmtList{$global->{NAME}} = $global;  # List of formats for help msgs.

  #
  #  Used when the format is "global" (from sgmlcheck).
  #
  $global->{preNSGMLS} = sub { $global->{NsgmlsOpts} .= " -s " };

  #
  #  Build up the list of formatters.
  #
  my $savdir = cwd;
  my %Locs;
  chdir "$main::LibDir/dist";
  my $dir = new DirHandle(".");
  die "Unable to read directory $main::LibDir/dist: $!" unless defined($dir);
  foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read()))
  {
    $Locs{$fmt} = "dist";
  }
  $dir->close();
  chdir "$main::LibDir/site";
  $dir = new DirHandle(".");
  die "Unable to read directory $main::LibDir/site: $!" unless defined($dir);
  foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read()))
  {
    $Locs{$fmt} = "site";
  }
  $dir->close();
  foreach my $fmt (keys %Locs)
  {
    require $fmt;
  }
  chdir $savdir;
}

=item SGMLTools::process_options ($0, @ARGV)

This function contains all initialization that is bound to the current
invocation of SGMLTools. It looks in C<$0> to deduce the backend that
should be used (sgml2txt activates the I<txt> backend) and parses the
options array. It returns an array of filenames it encountered during
option processing.

As a side effect, the environment variables I<SGMLDECL> and 
I<SGML_CATALOG_FILES> are modified.

=cut

sub process_options
{
  my $progname = shift;
  my @args = @_;

  #
  #  Deduce the format from the caller's file name
  #
  my ($format, $dummy1, $dummy2) = fileparse ($progname, "");
  $global->{myname} = $format;
  $format =~ s/sgml2(.*)/$1/;
  $format = "global" if $format eq "sgmlcheck";
  $format = "latex2e" if $format eq "latex";
  $FmtList{$format} = $Formats{$format} or 
     usage ("$global->{myname}: unknown format");
  $global->{format} = $format;

  #
  #  Parse all the options.
  #
  my @files = SGMLTools::Utils::process_options (@args);
  $#files > -1 || usage ("no filenames given");
  $global->{language} = Any2ISO ($global->{language});

  #
  #  Setup the SGML environment.
  #
  $ENV{SGML_CATALOG_FILES} .= (defined $ENV{SGML_CATALOG_FILES} ? ":" : "") .
     "$main::prefix/lib/sgml/iso-entities-8879.1986/iso-entities.cat";
  $ENV{SGML_CATALOG_FILES} .= ":$main::LibDir/dtd/catalog";
  if (-f "$main::LibDir/dtd/$format.dcl")
    {
      $ENV{SGMLDECL} = "$main::LibDir/dtd/$format.dcl";
    }
  elsif (-f "$main::LibDir/dtd/$global->{style}.dcl")
    {
      $ENV{SGMLDECL} = "$main::LibDir/dtd/$global->{style}.dcl";
    }
  elsif (-f "$main::LibDir/dtd/sgml.dcl")
    {
      $ENV{SGMLDECL} = "$main::LibDir/dtd/sgml.dcl";
    }

  #
  #  OK. Give the list of files we distilled from the options
  #  back to the caller.
  #
  return @files;
}

=item SGMLTools::process_file

With all the configuration done, this routine will take a single filename
and convert it to the currently active backend format. The conversion is
done in a number of steps in tight interaction with the currently active
backend (see also L<SGMLTools::BackEnd>):

=over

=item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe.

=item 2. Here: Run the preprocessor to handle conditionals.

=item 3. Here: Run NSGMLS.

=item 4. Backend: run pre-ASP conversion.

=item 5. Here: Run SGMLSASP.

=item 6. Backend: run post-ASP conversion, generating the output.

=back

All stages are influenced by command-line settings, currently active format,
etcetera. See the code for details.

=cut

sub process_file
{
  my $file = shift (@_);

  print "Processing file $file\n";
  my ($filename, $filepath, $filesuffix) = fileparse ($file, "\.sgml");
  my $tmpnam = $filepath . '/' . $filename;
  $file = $tmpnam . $filesuffix;
  -f $file || $file =~ /.*.sgml$/ || ($file .= '.sgml');
  -f $file || ($file = $tmpnam . '.SGML');
  -f $file || die "Cannot find $file\n";
  $global->{filename} = $filename;
  $global->{file} = $file;
  $global->{filepath} = $filepath;

  my $tmpdir = $ENV{'TMPDIR'} || '/tmp';
  my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename . $$;
  $ENV{"SGML_SEARCH_PATH"} .= ":$filepath";

  #
  # Set up the preprocessing command.  Conditionals have to be
  # handled here until they can be moved into the DTD, otherwise
  # a validating SGML parser will choke on them.
  #
  my($precmd) = "|sgmlpre output=$global->{format} $global->{define}";

  #
  #  You can hack $NsgmlsOpts here, etcetera.
  #
  $global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include});
  $global->{NsgmlsPrePipe} = "NOTHING";
  &{$Formats{$global->{format}}{preNSGMLS}} if 
     defined $Formats{$global->{format}}{preNSGMLS};

  #
  #  Run the prepocessor and nsgmls.
  #
  my ($ifile, $writensgmls);
  if ($global->{NsgmlsPrePipe} eq "NOTHING")
    {
      $ifile = new FileHandle $file;
    }
  else
    {
      $ifile = new FileHandle "$global->{NsgmlsPrePipe}|";
    }
  $writensgmls = new FileHandle
      "$precmd|$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} >$tmpbase.1";
  if ($global->{charset} eq "latin")
    {
      while (<$ifile>) 
        {
	  # Outline these commands later on - CdG
	  #change latin1 characters to SGML
	  #by Farzad Farid, adapted by Greg Hankins
	  s//\&Agrave;/g;
	  s//\&Aacute;/g;
	  s//\&Acirc;/g;
	  s//\&Atilde;/g;
	  s//\&Auml;/g;
	  s//\&Aring;/g;
	  s//\&AElig;/g;
	  s//\&Ccedil;/g;
	  s//\&Egrave;/g;
	  s//\&Eacute;/g;
	  s//\&Ecirc;/g;
	  s//\&Euml;/g;
	  s//\&Igrave;/g;
	  s//\&Iacute;/g;
	  s//\&Icirc;/g;
	  s//\&Iuml;/g;
	  s//\&Ntilde;/g;
	  s//\&Ograve;/g;
	  s//\&Oacute;/g;
	  s//\&Ocirc;/g;
	  s//\&Otilde;/g;
	  s//\&Ouml;/g;
	  s//\&Oslash;/g;
	  s//\&Ugrave;/g;
	  s//\&Uacute;/g;
	  s//\&Ucirc;/g;
	  s//\&Uuml;/g;
	  s//\&Yacute;/g;
	  s//\&THORN;/g;
	  s//\&szlig;/g;
	  s//\&agrave;/g;
	  s//\&aacute;/g;
	  s//\&acirc;/g;
	  s//\&atilde;/g;
	  s//\&auml;/g;
	  s//\&aring;/g;
	  s//\&aelig;/g;
	  s//\&ccedil;/g;
	  s//\&egrave;/g;
	  s//\&eacute;/g;
	  s//\&ecirc;/g;
	  s//\&euml;/g;
	  s//\&igrave;/g;
	  s//\&iacute;/g;
	  s//\&icirc;/g;
	  s//\&iuml;/g;
	  s//\&mu;/g;
	  s//\&eth;/g;
	  s//\&ntilde;/g;
	  s//\&ograve;/g;
	  s//\&oacute;/g;
	  s//\&ocirc;/g;
	  s//\&otilde;/g;
	  s//\&ouml;/g;
	  s//\&oslash;/g;
	  s//\&ugrave;/g;
	  s//\&uacute;/g;
	  s//\&ucirc;/g;
	  s//\&uuml;/g;
	  s//\&yacute;/g;
	  s//\&thorn;/g;
	  s//\&yuml;/g;
          print $writensgmls $_;
	}
    }
  else
    {
      while (<$ifile>)
        {
          print $writensgmls $_;
	}
    }
  $ifile->close;
  $writensgmls->close;
        
  #
  #  Special case: if format is global, we're just checking.
  #
  $global->{format} eq "global" && cleanup;

  #
  #  If the output file is empty, something went wrong.
  #
  -z "$tmpbase.1" and die "SGML parsing error - exiting";

  #
  #  If a preASP stage is defined, let the format handle it.
  #  
  #  preASP ($inhandle, $outhandle);
  #
  my $inpreasp = new FileHandle "<$tmpbase.1";
  my $outpreasp = new FileHandle ">$tmpbase.2";
  if (defined $Formats{$global->{format}}{preASP})
    {
      &{$Formats{$global->{format}}{preASP}}($inpreasp, $outpreasp) == 0 or
       die "error pre-processing $global->{format}.\n";
    }  
  else
    {
      copy ($inpreasp, $outpreasp);
    }
  $inpreasp->close;
  $outpreasp->close;

  #
  #  Run sgmlsasp, with an optional style if specified.
  #
  #  Search order:
  #  - libdir/site/<dtd>/<format>
  #  - libdir/dist/<dtd>/<format>
  #  So we need to fetch the doctype from the intermediate.
  #
  #  Note: this is a very simplistic check - but as far as I know,
  #  it is correct. Am I right?
  #
  my $tmp = new FileHandle "<$tmpbase.2";
  my $dtd = <$tmp>;
  $tmp->close;
  $dtd =~ s/^\(//;
  $dtd =~ tr/A-Z/a-z/;
  chop $dtd;
  $global->{dtd} = $dtd;

  my $style = "";
  if ($global->{style})
    {
      $style = "$main::LibDir/site/$dtd/$global->{format}/$global->{style}mapping";
      -r $style or
         $style = "$main::LibDir/dist/$dtd/$global->{format}/$global->{style}mapping";
    }
  my $mapping = "$main::LibDir/site/$dtd/$global->{format}/mapping";
  -r $mapping or $mapping = "$main::LibDir/dist/$dtd/$global->{format}/mapping";

  system ("$main::progs->{SGMLSASP} $style $mapping <$tmpbase.2|
      expand -$global->{tabsize} >$tmpbase.3");

  #
  #  If a postASP stage is defined, let the format handle it.
  #  It should leave whatever it thinks is right based on $file.
  #
  #  postASP ($inhandle)
  #
  my $inpostasp = new FileHandle "<$tmpbase.3";
  if (defined $Formats{$global->{format}}{postASP})
    {
      &{$Formats{$global->{format}}{postASP}}($inpostasp) == 0 or
	die "error post-processing $global->{format}.\n";
    }
  $inpostasp->close;

  #
  #  All done, remove the temporaries.
  #
  if( !$global->{debug} ) {
      remove_tmpfiles($tmpbase);
  }
}

=pod

=back

=head1 SEE ALSO

Documentation for various sub-packages of SGMLTools.

=head1 AUTHOR

Cees de Groot, C<E<lt>cg@pobox.comE<gt>>, and various SGML-Tools contributors as
listed in C<CONTRIBUTORS>.

=cut
1;
