# 	$rcs = ' $Id: TeX.pm,v 1.3 1996/01/29 21:53:20 ilya Exp ilya $ ' ;	
package TeX;

# Does not deal with verbatims
# Spaces are treated bad.

$notusualtoks="\\\\" . '\${}^_~&@%';
$notusualtokenclass="[$notusualtoks]";
$usualtokenclass="[^$notusualtoks]";
$macro='\\\\(?:[^a-zA-Z]|([a-zA-Z]+)\s*)'; # Why \\\\? double interpretation!
				# Contains one level of grouping
$active="$macro|\\\$\\\$|\\^\\^.|$notusualtokenclass";
$tokenpattern="$usualtokenclass|$active";
$multitokenpattern="($usualtokenclass+)|$active"; # Two levels of grouping
$commentpattern="(?:%.*\n\s*)+";
$whitespaceAndComment="\s*(%.*\n[ \t]*)+";
$optionalArgument="(?:\\[([^]]*)\\])?";	# Contains one level of grouping

for (qw(TeX::ArgToken TeX::BegArgsToken TeX::EndArgsToken )) {
  $pseudo{$_} = 1;
}


{
  package TeX::Comment;
  $ignore = 1;
}

{
  package TeX::Token;
  @ISA=('TeX::Chunk');

  sub refine {
    my $self = shift;
    return undef unless defined $self->[0];
    my $txt = shift;
    my $type;
    if (defined ($tok = $txt->{tokens}->{$self->[0]}) 
	and defined $tok->{type}) {
      bless $self, $tok->{type};
    }
  }
}

@TeX::Text::ISA=('TeX::Chunk');
@TeX::ArgToken::ISA=('TeX::Chunk');
@TeX::BegArgsToken::ISA=('TeX::Chunk');
@TeX::EndArgsToken::ISA=('TeX::Chunk');
@TeX::Paragraph::ISA=('TeX::Chunk');

{
  package TeX::Chunk;
  sub refine {}
  sub digest {}
  sub collect {$_[0]->[0]}
  sub new {
    my $class = shift;
    bless [@_], $class;
  }
  sub print {$_[0]->[0]}
}

{
  package TeX::Group;
  sub new {shift; my $in=shift; bless $in}
  sub print {
    local @arr;
    foreach (@{ $_[0] }) {
      push(@arr, $_->print);
    }
    "`" . join("',`", @arr) . "'";
  }
}

{
  package TeX::End::Group;
  @ISA=('TeX::Chunk');
  sub new {shift; my $in = shift; bless \$in}
  sub digest {
    my $wa = $_[1]->curwaitforaction;
    my $w = $_[1]->popwait;
    warn "Expecting `$w', got $_[0]->[0]" if $w ne $_[0]->[0];
    &$wa if defined $wa;
  }
}

{
  package TeX::Begin::Group;
  @ISA=('TeX::Chunk');
  sub digest {$_[1]->pushwait($_[0]->[0])}
}

{
  package TeX::SelfMatch;
  @ISA = ('TeX::Chunk');
  sub refine {
    if ($_[1]->curwait eq $_[0]->[0]) { 
      bless $_[0], TeX::End::Group;
    } else {
      bless $_[0], TeX::Begin::Group;
    }
  }
  sub digest {			# XXXX Should not be needed?
    if ($_[1]->curwait eq $_[0]->[0]) { 
      bless $_[0], TeX::End::Group;
      $_[0]->TeX::End::Group::digest($_[1]);
    } else {
      bless $_[0], TeX::Begin::Group;
      $_[1]->pushwait($_[0]->[0]);
    }
  }
}

{
  package TeX::GetParagraph;
  sub new {
    shift; 
    my $file = shift;
    my $fh;
    $fh = $ {$file->{fhs}}[-1] if @{$file->{fhs}};
    return undef if (not defined $fh or eof($fh)) and $file->{readahead} eq "";
    my $string = $file->{readahead};
    if (defined $fh) {
      while (($in = <$fh>) =~ /\S/) {
	$string .= $in;
      }
      while ( (($in = <$fh>) !~ /\S/) && !eof($fh)) {
	$string .= $in;
      }
      $file->{readahead}=$in;
    } else {
      $file->{readahead} = '';
    }
    bless \$string;
  }
}


{
  package TeX::OpenFile;

  $refgen="TeXOpenFile0000";

  sub new {
    shift; my $file=shift; my %opt = @_;
    if (defined $file) {
       ++$refgen;
       open("::$refgen",$file) || die "Cannot open $file: $!";
       die "End of file `$file' during opening" if eof("::$refgen");
    }
    my $fhs = defined $file ? ["::$refgen"] : [];
    bless {fhs => $fhs, readahead => ($opt{string} || ""), 
	     files => [$file],
	     "paragraph" => undef, 
	     "tokens" => ($opt{tokens} || \%TeX::Tokens),
	     waitfors => [], options => \%opt,
	     waitforactions => [],
	     defaultacts => [$opt{defaultact}],	# The last element is
                                                # the default action
                                                # for next deeper
                                                # level
	     actions => [defined $opt{action} ? 
			 $opt{action} : 
			 $opt{defaultact}],
	     waitargcounts => [0],
	     pending => [],	# Pseudotokens
	   };
  }
  sub DESTROY {
    my $in=shift; my $i = 0;
    for (@{$in->{fhs}}) {
      close($_)
	|| die "Cannot close $ {$in->{files}}[$i]: $!";
      $i++;
    }
  }

  sub paragraph {
    my $in=shift;
    #print "ep.in=$in\n";
    if ($in->{"paragraph"} and $ {$in->{"paragraph"}} ne "") {
      $in->{"paragraph"};
    } elsif (@{$in->{fhs}} and eof($ {$in->{fhs}}[-1])) {
      undef;
    } elsif (!@{$in->{fhs}} and $in->{readahead} eq '') {
      undef;
    } else {
      #warn "getting new\n";
      $in->{"paragraph"} = new TeX::GetParagraph $in;
      return "";
    }
  }

  sub pushwait {
    push(@{ $_[0]->{waitfors} }, $_[0]->{tokens}{$_[1]}{waitfor});
    push(@{ $_[0]->{actions} }, 
	 defined $_[2] ? $_[2] : $_[0]->{defaultacts}[-1]);
    push(@{ $_[0]->{waitforactions} }, $_[3]);
  }

  sub popwait {
    if ($#{ $_[0]->{waitfors} } < 0) {
      warn "Got negative depth"; return;
    }
    pop(@{ $_[0]->{actions} });
    pop(@{ $_[0]->{waitforactions} });
    pop(@{ $_[0]->{waitfors} });
  }

  sub curwait {
    my $ref = $_[0]->{waitfors}; $$ref[-1];
  }

  sub curwaitforaction {
    my $ref = $_[0]->{waitforactions}; $$ref[-1];
  }

  # These are default bindings. You probably should override it.

  sub eatOptionalArgument {
    my $in = shift->paragraph;
    return undef unless defined $in;
    my $comment = ( $$in =~ s/^\s*($TeX::commentpattern)//o );
    if ($$in =~ s/^\s*$TeX::optionalArgument//o) {
      new TeX::Token $1, $comment;
    } else {
      warn "No optional argument found";
      if ($comment) {new TeX::Token undef, $comment}
      else {undef}
    } 
  }

  sub eatFixedString {
    my $in = shift->paragraph;
    return undef unless defined $in;
    my $str=shift;
    my ($comment) = ( $$in =~ s/^\s*($TeX::commentpattern)//o );
    if ($$in =~ s/^\s*$str//) {new TeX::Token $&, $comment}
    else {
      warn "String `$str' expected, not found";
      if ($comment) {new TeX::Token undef, $comment}
      else {undef}
    } 
  }

  sub eatBalanced {
    my $txt = shift;
    my ($in);
    warn "Did not get `{' when expected", return undef
      unless defined ($in=$txt->eatFixedString('{')) && defined ($in->[0]);
    $txt->eatBalancedRest;
  }

  sub eatBalancedRest {
    my $txt = shift;
    my ($count,$in,@in)=(1);
  EAT:
    {
      warn "Unfinished balanced next", last EAT 
	unless defined ($in=$txt->eatMultiToken) && defined $in->[0];
      push(@in,$in);
      $count++,next if $in->[0] eq '{';
      $count-- if $in->[0] eq '}';
      pop(@in), last EAT unless $count;
      redo EAT;
    }
    bless \@in, 'TeX::Group';
  }

  sub eatGroup {		# If arg2==1 will eat exactly one
                                # group, otherwise a group or a
                                # multitoken.
    my $txt = shift;
    local ($in,$r,@in);
    if (defined ($in[0]=$txt->eatMultiToken(shift)) and defined $in[0]->[0]) {
      $in[0]->refine($txt);
      if (ref $in[0] ne 'TeX::Begin::Group') {
	return $in[0];
      } else {
	while (defined ($r=ref($in=$txt->eatGroup)) # Eat many groups
	       && $r ne 'TeX::End::Group') {
	  push(@in,$in);
	}
	if (defined $r) {push(@in,$in)}
	else {warn "Uncompleted group"}
      }
    } else {
      warn "Got nothing when argument expected";
      return undef;
    }
    bless \@in, 'TeX::Group';
  }

  sub eatUntil {		# We suppose that the text to match
				# fits in a paragraph 
    my $txt = shift;
    my $m = shift;
    my ($in,@in);
    while ( (!defined $txt->{'paragraph'} || $ {$txt->{'paragraph'}} !~ /$m/)
	   && defined ($in=$txt->eatGroup(1))) {
      push(@in,@$in);
    }
    ($ {$txt->{'paragraph'}} =~ s/$m//) || warn "Delimiter `$m' not found";
    bless \@in, 'TeX::Group';
  }

  sub eatMultiToken {		# If arg2==1 will eat one token
    my $in = shift->paragraph;
    return undef unless defined $in;
    return new TeX::Paragraph unless $in;
    my $comment = undef;
    $comment = $2 if $$in =~ s/^(\s*)($TeX::commentpattern)/$1/o;
    my $multi = shift;
    # Cannot use if () block, because $& is local.
    $got = $$in =~ s/^\s*($TeX::tokenpattern)//o	if $multi;
    $got = $$in =~ s/^\s*($TeX::multitokenpattern)//o	unless $multi;
    if ($got and defined $2) {new TeX::Text $&, $comment}
    elsif ($got and defined $3) {new TeX::Token "\\$3", $comment} # Multiletter
    elsif ($got and defined $1) {new TeX::Token $1, $comment}
    elsif ($comment) {new TeX::Token undef, $comment}
    else {undef}
  }

  sub eat {
    my $txt = shift;
    return pop @{ $txt->{pending} } if @{ $txt->{pending} };
    my $in=$txt->eatMultiToken;
    return undef unless defined $in;
    $in->refine($txt);
    $in->digest($txt);
    my ($Token, $type, @arr);
    return $in 
      unless defined $in && defined $in->[0] && $in->[0] =~ /$TeX::active/o
	&& defined ( $Token = $txt->{tokens}->{$in->[0]} );
    $type = $Token->{Type} or return $in;
    if ($type eq 'action') {
      return &{$Token->{data}}(@$in);
    } elsif ($type eq 'argmask') {
      # eatWithMask;		# ????
    } elsif ($type eq 'args') {
      my $count = $Token->{data};
      while ($count--) {
	push(@arr,$txt->eatGroup(1));
      }
      $in->[3] = \@arr;
    } elsif ($type eq 'report_args') {
      my $curact = $txt->{actions}[-1];
      my $count = $Token->{data};
      my $ordinal = 0;
      my $argact = sub {	# Will be executed after each argument
	&$curact(@_);
	return if ref $_[0] eq 'TeX::ArgToken' or ref $_[0] eq 'TeX::BegArgsToken' or ref $_[0] eq 'TeX::Begin::Group';
	$ordinal++;
	if ($ordinal == $count) {
	  push @{ $txt->{pending} }, new TeX::EndArgsToken [$in, $count];
	} else {
	  push @{ $txt->{pending} }, new TeX::ArgToken [$in, $ordinal, $count];
	}
	pop @{ $txt->{actions} } if $ordinal == $count; # Remove this action on the last count
      };
      my $nextact = sub {	# Will be executed after this token.
	&$curact; 
	$txt->{actions}[-1] = $argact; 
      };
      push @{ $txt->{pending} }, 
        new TeX::BegArgsToken [$in, $count];
      push @{ $txt->{actions} }, $nextact;
    } elsif ($type eq 'begin') {
      my $what = $txt->eatGroup(1);
      $in->[3] = \@arr;
    } else {
      warn "Format of token data unknown for `", $in->[0], "'"; 
    }
    return $in;
  }
  
  sub report_arg {
    my $n = shift;
    my $max = shift;
    my $act = shift;
    my $lastact = shift;
    if ($n == $max) {
      &$lastact($n);
    } else {
      &$act($n,$max);
    }
  }

  sub eatDefine {
    my $txt = shift;
    my ($args, $body);
    warn "No `{' found after defin", return undef 
      unless $args=$txt->eatUntil('{');
    warn "Argument list @$args too complicated", return undef 
      unless @$args==1 && $$args[0]=~ /^(\ \#\d)*$/;
    warn "No `}' found after defin", return undef 
      unless $body=$txt->eatBalancedRest;
    #my @args=split(/(\#[\d\#])/,$$);       # lipa
  }
  
  sub process {
    my ($txt, $eaten, $act) = (shift);
    while (defined ($eaten = $txt->eat)) {
      if (defined ($act = $txt->{actions}[-1])) {
	&$act($eaten,$txt);
      }
    }
  }
}

# type => 'where to bless to', Type => how to process

%Tokens = (
  '{' => {'type' => 'TeX::Begin::Group', 'waitfor' => '}'},
  '}' => {'type' => 'TeX::End::Group'},
  "\$" => {'type' => 'TeX::SelfMatch', waitfor => "\$"},
  '$$' => {'type' => 'TeX::SelfMatch', waitfor => '$$'},
  '\begin' => {Type => 'args', data => 1},
  '\end' => {Type => 'args', data => 1},
  '\left' => {Type => 'args', data => 1},
  '\right' => {Type => 'args', data => 1},
  '\frac' => {Type => 'report_args', data => 2},
  '\sqrt' => {Type => 'report_args', data => 1},
  '^' => {Type => 'report_args', data => 1},
  '_' => {Type => 'report_args', data => 1},
);

{
  my $i = 0;
  for (
       ('') x 8,		# 1st row
       ('') x 8,
       ('') x 8,		# 2nd row
       ('') x 8,
       '', '', 'forall', '', 'exists', '', '', '???', # 3rd: symbols
       '', '', '', '', '', '', '', '',
       '', '', '', '', '', '', '', '', # 4th: numbers and symbols
       '', '', '', '', '', '', '', '',
       '???', qw(Alpha Beta Chi Delta Epsilon Phi Gamma 
		 Eta Iota vartheta Kappa Lambda Mu Nu Omicron 
		 Pi Theta Rho Sigma Tau Ypsilon varsigma Omega
		 Xi Psi Zeta), '', 'therefore', '', 'perp', '',
       '', qw(alpha beta chi delta varepsilon phi gamma
	      eta iota varphi kappa lambda mu nu omicron
	      pi theta rho sigma tau ypsilon varpi omega
	      xi psi zeta), '', '', '', '', '',
       ('') x 8,		# 9st row
       ('') x 8,
       ('') x 8,		# 10nd row
       ('') x 8,
       '', '', '', 'leq', '', 'infty', '', '', # 11th row
       '', '', '', '', 'from', '', 'to', '',
       'circ', 'pm', '', 'geq', 'times', '', 'partial', 'bullet', # 12th row
       '', 'neq', 'equiv', 'approx', 'dots', 'mid', 'hline', '',
       'Aleph', '', '', '', 'otimes', 'oplus', 'empty', 'cap', # 13th row
       'cup', '', '', '', '', '', 'in', 'notin',
       '', 'nabla', '', '', '', 'prod', '', 'cdot', # 14th row
       '', 'wedge', 'vee', '', '', '', '', '',
       '', '<', '', '', '', 'sum', '', '', # 15th row
       '', '', '', '', '', '', '', '',
       '', '>', 'int', '', '', '', '', '', # 16th row
       '', '', '', '', '', '', '', '',
      ) {
    $xfont{"\\$_"} = ['symbol', chr($i)] unless $_ eq '';
    $i++;
  }
}

1;
__END__

=head1 NAME

Text::TeX -- Perl module for parsing of C<TeX>.

=cut

