# /=====================================================================\ #
# |  LaTeXML::MathGrammar                                         | #
# | LaTeXML's Math Grammar for postprocessing                           | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
# ================================================================================
# LaTeXML's MathGrammar.
# To compile :
#      perl -MParse::RecDescent - MathGrammar LaTeXML::MathGrammar
# ================================================================================
# Startup actions: import the constructors
{ BEGIN{ use LaTeXML::MathParser qw(:constructors); 
#### $::RD_TRACE=1;
}}
  
# Rules section
# ========================================
# Naming Conventions:
#   UPPERCASE   : is for terminals, ie. classes of TeX tokens.
#   Initial Cap : for non-terminal rules that can possibly be invoked externally.
#   Initial lowercase : internal rules.
# ========================================
# For internal rules
#   moreFoos[$foo] : Looks for more Foo's w/appropriate punctuation or operators, 
#     whatever is appropriate, and combines it with whatever was passed in
#     as pattern arg. Typically, the last clause would be simply
#       | { $arg[0]; }
#     to return $foo without having found any more foo's.
#     In such a case, it appears to be advantageous to have the first clause be
#       : /^\Z/ { $arg[0]; }
#     which will return immediately if there is no additional input.
#   addFoo[$bar]  : Check for a following Foo and add it, as appropriate to
#   the $bar.
# ========================================
# Note that Parse:RecDescent does NOT backtrack within a rule:
#  If a given production succeeds, the rule succeeds, but even if the ultimate
# parse fails, the parser will NOT go back and try another production within
# that same rule!!!  Of course, if a production fails, it goes on to the next,
# and if that rule fails, etc...
#
# For example ||a|-|b|| won't work (in spite of various attempts to control it)
# After seeing the initial || and attempting to parse an Expression, it gets
#   a * abs( - abs(b))
# without anything to match the initial ||; and it will NOT backtrack to try
# a shorter Expression!
#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Top Level expressions; Just about anything?
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Note in particular, that many inline formula contain `half' a formula,
# with the lead-in text effectively being the LHS. eg. function $=foo$;
# similarly you can end up with a missing RHS, $x=$ even.

Start   : Anything /^\Z/                        { $item[1]; }

#======================================================================
Anything : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Anything : AnythingAny /^\Z/                    { $item[1]; }

#======================================================================
AnythingAny :
          Formulae 
        | OPEN Formulae CLOSE             { Fence($item[1],$item[2],$item[3]); }
        | modifierFormulae
        | OPEN modifierFormula CLOSE      { Fence($item[1],$item[2],$item[3]); }
        | MODIFIER
        | MODIFIEROP Expression           { Apply($item[1],Absent(),$item[2]);}
        | METARELOP Formula               { Apply($item[1],Absent(),$item[2]); }
        | AnyOp (PUNCT(?) AnyOp {[$item[1]->[0]||InvisibleComma(), $item[2]]})(s)
                                          { NewList($item[1],map(@$_,@{$item[2]})); }
        | FLOATSUPERSCRIPT POSTSUBSCRIPT  { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
        | FLOATSUBSCRIPT POSTSUPERSCRIPT  { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
        | FLOATSUPERSCRIPT                { NewScript(Absent(),$item[1]); }
        | FLOATSUBSCRIPT                  { NewScript(Absent(),$item[1]); }
        | AnyOp Expression                { Apply($item[1],Absent(),$item[2]);}
                                          
# a top level rule for sub and superscripts that can accept all sorts of junk.
Subscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Subscript :
          aSubscript   (PUNCT(?) aSubscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
                 { NewList($item[1],map(@$_,@{$item[2]})); }

Superscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Superscript :
          aSuperscript (PUNCT(?) aSuperscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
                 { NewList($item[1],map(@$_,@{$item[2]})); }

aSubscript :
          Formulae
        | AnyOp Expression               { Apply($item[1],Absent(),$item[2]);}
        | AnyOp

aSuperscript :
          supops
        | Formulae 
        | AnyOp Expression               { Apply($item[1],Absent(),$item[2]);}
        | AnyOp

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Formulae  (relations or grouping of expressions or relations)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# This maze attempts to recognize the various meaningful(?) alternations of
# Expression(s) separated by punctuation, relational operators or metarelational
# operators [Think of     $a=b=c$ vs $a=b, c=d$  vs. $a=b,c,d$  .. ]
# and group them into Formulae (collections of relations), including relations
# which have punctuated collections of Expression(s) on either the LHS or RHS, 
# as well as `multirelation' like a = b = c, or simply punctuated collections of
# Expression(s)

Formulae : Formula moreFormulae[$item[1]]

# moreFormulae[$formula]; Got a Formula, what can follow?
moreFormulae :
          /^\Z/ { $arg[0];}   # short circuit!
        | (endPunct Formula { [$item[1],$item[2]]; })(s)
                    { NewFormulae($arg[0],map(@$_,@{$item[1]})); }
        | metarelopFormula(s)     { NewFormula($arg[0],map(@$_,@{$item[1]})); }
        | { $arg[0]; }

# Punctuation that ends a formula
endPunct : PUNCT | PERIOD


Formula : Expression extendFormula[$item[1]]

# extendFormula[$expression] ; expression might be followed by punct Expression... 
#   or relop Expression... or arrow Expression or nothing.
extendFormula :
          /^\Z/ { $arg[0];}   # short circuit!
        | punctExpr(s) maybeRHS[$arg[0],map(@$_,@{$item[1]})]
        | relop Expression moreRHS[$arg[0],$item[1],$item[2]]
        | relop /^\Z/    { NewFormula($arg[0],$item[1], Absent()); }
        | { $arg[0]; }

# maybeRHS[$expr,(punct,$expr)*]; 
#    Could have RELOP Expression (which means the (collected LHS) relation RHS)
#    or done (just collection)
maybeRHS :
          /^\Z/ { NewList(@arg); }
        | relopExpr(s) { NewFormula(NewList(@arg),map(@$_,@{$item[1]})); }
        | { NewList(@arg); }
# --- either line could be followed by (>0)
# For the latter, does a,b,c (<0) mean c<0 or all of them are <0 ????

# moreRHS[$expr,$relop,$expr]; Could have more (relop Expression)
# or (punct Expression)*
moreRHS :
          /^\Z/   { NewFormula($arg[0],$arg[1],$arg[2]); } # short circuit!
        | PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
        | relopExpr(s?) { NewFormula($arg[0],$arg[1],$arg[2],
                                     map(@$_,@{$item[1]})); }
# --- 1st line could be preceded by (>0) IF it ends up end of formula
# --- 2nd line could be followed by (>0)

# maybeColRHS[$expr,$relop,$expr,(punct, $expr)*];
#    Could be done, get punct (collection) or rel Expression (another formula)
maybeColRHS :
          /^\Z/ { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
        | relop Expression moreRHS[$arg[$#arg],$item[1],$item[2]]
            { NewFormulae(NewFormula($arg[0],$arg[1],
                          NewList(@arg[2..$#arg-2])),$arg[$#arg-1],$item[3]); }
        | PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
        | { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
# --- 1st line handles it through more RHS ???
# --- 2nd line could be preceded by (>0) if it ends formula
# --- 3rd line could be followed by (>0)


punctExpr : PUNCT Expression                    { [$item[1],$item[2]]; }

relopExpr : relop Expression                    { [$item[1],$item[2]]; }
          | relop /^\Z/                         { [$item[1], Absent()]; }

metarelopFormula :
            METARELOP Formula                   { [$item[1],$item[2]]; }
          | METARELOP /^\Z/                     { [$item[1], Absent()]; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# `Modifier' formula, things like $<0$, that might follow another formula or text.
# Absent() is a placeholder for the missing thing... (?)
# [and also when the LHS is moved away, due to alignment rearrangement]
modifierFormulae : modifierFormula moreFormulae[$item[1]]
modifierFormula : relop Expression moreRHS[Absent(),$item[1],$item[2]]

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Expressions; sums of terms
# Abstractly, things combined by operators binding tighter than relations
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Expressions : Expression punctExpr(s?)
                                { NewList($item[1],map(@$_,@{$item[2]})); }

Expression  : SignedTerm moreTerms[[],$item[1]] addExpressionModifier[$item[2]]
            # # very tentatively allow an operator as a complete expression
            # # BUT, this should only suceed if at end, or followed by punctuation!!!!!!!
            # # (or CLOSE, or... ?!?!?!?)
            | AnyOp ...anyOpIsolator { $item[1]; }

anyOpIsolator : /^\Z/ | PUNCT | CLOSE

# moreTerms[ [($term,$addop)*], $term];  Check for more addop & term's
moreTerms :
          /^\Z/ { LeftRec(@{$arg[0]},$arg[1]); }   # short circuit!
        | AddOp moreTerms2[$arg[0],$arg[1],$item[1]]
        | { LeftRec(@{$arg[0]},$arg[1]); }

# moreTerms2[ [($term,$addop)*], $term, $addop]; Check if addop is followed
#  by another term, or if not, it presumably represents a limiting form
#  like "a+" (ie a from above)
moreTerms2   : Term moreTerms[ [@{$arg[0]},$arg[1],$arg[2]],$item[1] ]
            | { LeftRec(@{$arg[0]},Apply(New('limit-from'),$arg[1],$arg[2])); }

# addExpressionModifier[$expr]
addExpressionModifier :
          /^\Z/ { $arg[0];}   # short circuit!
        | PUNCT(?) OPEN relop Expression balancedClose[$item[2]]
            { Apply(New('annotated'),$arg[0],
                    Fence($item[2], Apply($item[3],Absent(),$item[4]),$item[5])); }
        # An alternative form would have OPEN Expression relop...
        # but that seems less like a "modifier" and more like a relation as argument!
###        | PUNCT(?) OPEN Expression relop Expression
###                 moreRHS[$item[3],$item[4],$item[5]] balancedClose[$item[2]]
###            { Apply(New('annotated'),$arg[0],Fence($item[2],$item[6],$item[7])); }
        | PUNCT(?) OPEN MODIFIEROP Expression balancedClose[$item[2]]
            { Apply($item[3],$arg[0],$item[4]); } # Is the punctuation Lost here?
        | MODIFIER
            { Apply(New('annotated'),$arg[0],$item[1]); }
        | MODIFIEROP Expression
            { Apply($item[1],$arg[0],$item[2]); }
        | { $arg[0]; }


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terms: products of factors
# Abstractly, things combined by operators binding tighter than addition
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

SignedTerm : AddOp Term                         { Apply($item[1],$item[2]); }
        | Term

Term    : Factor moreFactors[$item[1]]

# moreFactors[$factor]
moreFactors :
          /^\Z/ { $arg[0];}   # short circuit!
        | MulOp Factor moreFactors[ApplyNary($item[1],$arg[0],$item[2])]
        # Given an explicit COMPOSEOP, we'll assume the preceding is
        # an implicit lambda of some sort(?)
        | COMPOSEOP makeComposition[$arg[0],$item[1]]
        | { ($forbidEvalAt ? undef : 1); }
          evalAtOp maybeEvalAt[$arg[0],$item[2]]
        | Factor moreFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Factors: function applications, postfix on atoms, etc.
# Abstractly, things combined by operators binding tighter than multiplication
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Factor  : 
        # These 2nd two are Iffy; hopefully the 1st rule will protect from backtrack?
          OPEN ARRAY CLOSE  addScripts[Fence($item[1],$item[2],$item[3])]
          # perhaps only when OPEN or CLOSED is { or } ??
        # should be explicitly {, and moreover the array should be only 1 or 2 columns!
        | LBRACE ARRAY { InterpretDelimited(New('cases'),$item[1],$item[2],Absent()); }
        | ARRAY RBRACE { InterpretDelimited(New('cases'),Absent(),$item[1],$item[2]); }
        | preScripted['FUNCTION'] addArgs[$item[1]]
        | preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
        | preScripted['TRIGFUNCTION'] addTrigFunArgs[$item[1]]
        | preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
        | preScripted['UNKNOWN'] doubtArgs[$item[1]]
        | NUMBER   addScripts[$item[1]]
        | SCRIPTOPEN scriptFactorOpen[$item[1]]
        | OPEN factorOpen[$item[1]]
        # handle INTOP seperately, since it recognizes d as diff
        | preScripted['INTOP'] addIntOpArgs[$item[1]]
        | preScripted['bigop'] addOpArgs[$item[1]]
        | { ($forbidVertBar ? undef : 1); }
          SINGLEVERTBAR SINGLEVERTBAR absExpression SINGLEVERTBAR SINGLEVERTBAR # || exp || ==> norm
              addScripts[Fence(CatSymbols($item[2],$item[3],undef,'||',role=>'OPEN'),
                $item[4],
                CatSymbols($item[5],$item[6],undef,'||',role=>'CLOSE'))]
        | { ($forbidVertBar ? undef : 1); }
          VERTBAR absExpression VERTBAR                     # | exp | => absolute-value
              addScripts[Fence($item[2],$item[3],$item[4])]
        | { ($forbidVertBar ? undef : IsNotationAllowed('QM')); }
          MIDBAR ketExpression RANGLE { SawNotation('QM'); } # | exp > ==> ket
              addScripts[InterpretDelimited(New('ket'),
                    Annotate($item[2],role=>'OPEN'),$item[3],Annotate($item[4],role=>'CLOSE'))] # ket
        | { IsNotationAllowed('QM'); }
          LANGLE ketExpression MIDBAR maybeBra[$item[2],$item[3],$item[4]]
        | { IsNotationAllowed('QM'); }
          LANGLE absExpression RANGLE
               addScripts[Fence(Annotate($item[2],role=>'OPEN'),
                          $item[3],
                          Annotate($item[4],role=>'CLOSE'))]
        | OPERATOR addScripts[$item[1]] nestOperators[$item[2]]
                    addOpFunArgs[$item[3]]

ATOM_OR_ID : ATOM | ID | ARRAY


# A restricted sort of Factor for the unparenthesized argument to a function.
# Note f g h => f*g*h, but f g h x => f(g(h(x)))  Seems like what people mean...
# Should there be a special case for trigs?
barearg : aBarearg moreBareargs[$item[1]]
aBarearg : 
          preScripted['FUNCTION'] addArgs[$item[1]]
        | preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
        | preScripted['TRIGFUNCTION'] addTrigFunArgs[$item[1]]
        | preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
        | preScripted['UNKNOWN'] doubtArgs[$item[1]]
        | NUMBER   addScripts[$item[1]]
        | VERTBAR Expression VERTBAR addScripts[Fence($item[1],$item[2],$item[3])]

# moreBareargs[$argpart]
moreBareargs :
          /^\Z/ { $arg[0];}   # short circuit!
        | MulOp aBarearg moreBareargs[ApplyNary($item[1],$arg[0],$item[2])]
        | aBarearg moreBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }

# A variation that does not allow a bare trig function
trigBarearg : aTrigBarearg moreTrigBareargs[$item[1]]
aTrigBarearg : 
          preScripted['FUNCTION'] addArgs[$item[1]]
        | preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
        | preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
        | preScripted['UNKNOWN'] doubtArgs[$item[1]]
        | NUMBER   addScripts[$item[1]]
        | VERTBAR Expression VERTBAR addScripts[Fence($item[1],$item[2],$item[3])]

# moreTrigBareargs[$argpart]
moreTrigBareargs :
          /^\Z/ { $arg[0];}   # short circuit!
        | MulOp aTrigBarearg
                 moreTrigBareargs[ApplyNary($item[1],$arg[0],$item[2])]
        | aTrigBarearg 
          moreTrigBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }

# maybeEvalAt[$thing,$vertbar]
maybeEvalAt :
          POSTSUBSCRIPT moreEvalAt[$arg[0],$arg[1],$item[1]]
        | POSTSUPERSCRIPT POSTSUBSCRIPT moreFactors[NewEvalAt($arg[0],$arg[1],$item[2],$item[1])]

# moreEvalAt[$thing,$vertbar,$sub]
moreEvalAt :
          POSTSUPERSCRIPT moreFactors[NewEvalAt($arg[0],$arg[1],$arg[2],$item[1])]
         | moreFactors[NewEvalAt($arg[0],$arg[1],$arg[2],undef)]

#======================================================================
# After < a | we might be done, or get <a|b> or <a|H|b>

# <$expr |   maybeBra[$langle,$expr,$bar]
maybeBra :
          ketExpression maybeBraket[$arg[0],$arg[1],$arg[2],$item[1]]
        | { SawNotation('QM'); }
          addScripts[InterpretDelimited(New('bra'),
                     Annotate($arg[0],role=>'OPEN'),$arg[1],Annotate($arg[2],role=>'CLOSE'))]

# <$expr1|$expr2   maybeBraket[$langle,$expr1,$bar,$expr2]
maybeBraket :
          RANGLE { SawNotation('QM'); }
              addScripts[InterpretDelimited(New('inner-product', undef,role=>'MIDDLE'),
                                   Annotate($arg[0],role=>'OPEN'),$arg[1],
                                   Annotate($arg[2],role=>'MIDDLE'),
                                   $arg[3],Annotate($item[1],role=>'CLOSE'))]
        | MIDBAR ketExpression RANGLE { SawNotation('QM'); }
              addScripts[InterpretDelimited(New('quantum-operator-product',undef), # Is this a good representation?
                            Annotate($arg[0],role=>'OPEN'),$arg[1],
                                     Annotate($arg[2],role=>'CLOSE'),
                            $arg[3],
                            Annotate($item[1],role=>'OPEN'),$item[2],
                                    Annotate($item[3],role=>'CLOSE'))]

# bra's and ket's (ie <foo| & |foo>) can contain a rather wide variety of things
# from simple symbols to full (but typically short) formula, and so we
# want to use the Formulae production.  However, for that to work,
# we need to keep |, < and > (which delimit the bra & ket) from being
# interpreted as usual, otherwise the parse will walk off the end, or
# fail at a level that precludes backtracking.
ketExpression : <rulevar: local $forbidVertBar = 1>
ketExpression : <rulevar: local $forbidLRAngle = 1>
ketExpression : Formulae
              | METARELOP | ARROW | AddOp | MulOp | MODIFIEROP

#======================================================================
# absExpression; need to be careful about misinterpreting the next |
# since we can't backtrack across productions.
# Disable evalAt notation ( |_{x=0} ) and explicitly control abs nesting.
absExpression : <rulevar: local $forbidEvalAt = 1>
absExpression : <rulevar: local $MaxAbsDepth = $MaxAbsDepth-1>
absExpression : { ($MaxAbsDepth >= 0 ? 1 : (SawNotation('AbsFail')&& undef)); } Expression

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Adding pre|post sub|super scripts to various things.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# addScripts[$base] ; adds any following sub/super scripts to $base.
addScripts :
          /^\Z/ { $arg[0];}   # short circuit!
        | POSTSUPERSCRIPT  addScripts[NewScript($arg[0],$item[1])]
        | POSTSUBSCRIPT    addScripts[NewScript($arg[0],$item[1])]
        | POSTFIX          addScripts[Apply($item[1],$arg[0])]
        | { $arg[0]; }

# ================================================================================
# preScripted['RULE']; match a RULE possibly preceded by sub/super prescripts,
#  possibly followed by sub/superscripts.  The initial prescript can only be FLOAT
#  but the following ones can be either POST (which combine) or FLOAT (which don't)
preScripted :
          FLOATSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | FLOATSUBSCRIPT   inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | <matchrule:$arg[0]> addScripts[$item[1]]
# inpreScripted[$prescript]
inpreScripted :
          POSTSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | POSTSUBSCRIPT   inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | FLOATSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | FLOATSUBSCRIPT   inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | <matchrule:$arg[0]> addScripts[$item[1]]

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Parenthetical: Things wrapped in OPEN .. CLOSE
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# ================================================================================
# Factors that begin with OPEN; grouped expressions and objects like sets,
# intervals, etc.
# factorOpen[$open] : Dealing with various things that start with an open.
factorOpen :
          AddOp balancedClose[$arg[0]] addScripts[Fence($arg[0],$item[1],$item[2])] # For (-)
        # Parenthesized Operator possibly w/scripts
        | preScripted['bigop'] balancedClose[$arg[0]] 
                 addScripts[Fence($arg[0],$item[1],$item[2])] Factor
            { Apply($item[3],$item[4]); }
        # Parenthesized Operator including a pre-factor
        | Factor preScripted['bigop'] balancedClose[$arg[0]] 
             addScripts[Fence($arg[0],
                        Apply(InvisibleTimes(),$item[1],$item[2]),$item[3])] Factor
          { Apply($item[4],$item[5]); }
        # read expression too? match subcases.
        | Expression factorOpenExpr[$arg[0],$item[1]]
        # Empty OPEN CLOSE ?
        | balancedClose[$arg[0]] addScripts[Fence($arg[0],$item[1])]
        # Sequence starting  with an operator ? 
        | AnyOp factorOpenExpr[$arg[0],$item[1]]

# factorOpenExpr[$open,$expr];  Try to recognize various things that start
#   this way. Need some extra productions for sets (w/possible middle '|' )
#   and vectors; all n-ary.
factorOpenExpr :
        # 2nd expression; some kind of pair, interval, set, whatever [Any CLOSE, NOT balancedClose]
         (PUNCT Expression { [$item[1],$item[2]]; })(s)  CLOSE
                  addScripts[Fence($arg[0],$arg[1],map(@$_,@{$item[1]}),$item[2])]
        # only 2 things and 2nd one is an op?; some kind of group???
        | PUNCT AnyOp balancedClose[$arg[0]]
                   addScripts[InterpretDelimited(New('group'),
                                            $arg[0],$arg[1],$item[1],$item[2],$item[3])]
        # parenthesized expression.
        | balancedClose[$arg[0]] addScripts[Fence($arg[0],$arg[1],$item[1])]

# ================================================================================
# Sets special cases
# A conditionalized set
# scriptFactorOpen[$open]
scriptFactorOpen :
          Formula suchThatOp Formulae balancedClose[$arg[0]]
          addScripts[InterpretDelimited(New('conditional-set'),
                                    $arg[0], $item[1],$item[2], $item[3],$item[4])]
        # Else fall through to normal factorOpen
        | factorOpen[$arg[0]]

# The "such that" that can appear in a sets like {a "such that" predicate(a)}
# accept vertical bars, and colon
suchThatOp : MIDDLE | VERTBAR 
         | /METARELOP:colon:\d+/        { Lookup($item[1]); }
# ================================================================================
# Function args, etc.

# maybeArgs[$function] ; Add arguments to an identifier, but only if made explict.
maybeArgs : 
          /^\Z/ { $arg[0];}   # short circuit!
        | APPLYOP requireArgs[$arg[0]]
        | { $arg[0]; }

# doubtArgs[$unknown]; Check for apparent arguments following an
#   Unknown (unclassified) item. If an explicit APPLYOP follows,
#   it seemingly asserts that the preceding _is_ a function,
#   otherwise Warn if there seems to be an arglist.
doubtArgs :
          /^\Z/ { $arg[0];}   # short circuit!
        | APPLYOP requireArgs[$arg[0]]
        | { IsNotationAllowed('MaybeFunctions'); } OPEN forbidArgs[$arg[0],$item[2]]
        | { $arg[0]; }

# forbidArgs[$unknown,$open]; Got a suspicious pattern: an unknown and open. 
#    If the following seems to be an argument list, warn.
forbidArgs :
          Argument (argPunct Argument)(s) balancedClose[$arg[1]]
                                                { MaybeFunction($arg[0]); undef; }
        # Term really could be Argument, but that gives a "possible function" warning
        # even for a(b+c) which has a good reason for the parentheses; These patterns FAIL anyway!!
        | Term balancedClose[$arg[1]]           { MaybeFunction($arg[0]); undef; }

# requireArgs[$function]; Add arguments following a known function, failing if it
#   isn't there! Typically this follows an explicit applyop
requireArgs :
          OPEN Argument (argPunct Argument {[$item[1],$item[2]];})(s?)
               balancedClose[$item[1]]
                  { ApplyDelimited($arg[0],$item[1],$item[2],
                                   map(@$_,@{$item[3]}),$item[4]); }
        # Hmm, should only be applicable to _some_ functions ???                  
        | barearg                               { Apply($arg[0],$item[1]); }

# addArgs[$function]; We've got a function; Add following arguments to a
#   function, if present.  Also recognizes compostion type ops (something
#   combining two functions into a function)
addArgs :
          /^\Z/ { $arg[0];}   # short circuit!
        | addEasyArgs[$arg[0]]
        # Accept bare arg (w/o parens) ONLY if an explicit APPLYOP
        | APPLYOP barearg                               { Apply($arg[0],$item[2]);}
        | { $arg[0]; }   # Just return the function itself,then.

# addOpFunArgs[$function]; Same as above but for functions classified as
#   OPFUNCTION. Ie operator-like functions such as \sin, that don't
#   absolutely require parens around args.
addOpFunArgs :
          /^\Z/ { $arg[0];}   # short circuit!
        | addEasyArgs[$arg[0]]
        # Accept bare arg (w/o parens) for this class of functions.
        | APPLYOP(?) barearg                            { Apply($arg[0],$item[2]);}
        | { $arg[0]; }   # Just return the function itself,then.

# addTrigFunArgs[$function]; Yet another variation;
#   It differs in the barearg is restricted to non-trig
addTrigFunArgs :
          /^\Z/ { $arg[0];}   # short circuit!
        | addEasyArgs[$arg[0]]
        # Accept bare arg (w/o parens) for this class of functions.
        | APPLYOP(?) trigBarearg                        { Apply($arg[0],$item[2]);}
        | { $arg[0]; }   # Just return the function itself,then.

# addEasyArgs[$function]; gets unambiguous compositions or parenthesized arguments
#  These are the "easy" cases for addArgs and addOpFunArgs.
addEasyArgs :
          COMPOSEOP makeComposition[$arg[0],$item[1]]
        |  APPLYOP(?) OPEN Argument
                      (argPunct Argument {[$item[1],$item[2]];})(s?)
                   balancedClose[$item[2]]
                  { ApplyDelimited($arg[0],$item[2],$item[3],
                                   map(@$_,@{$item[4]}),$item[5]); }

# A function (or other) argument would normally be a simple expression,
# but often relations (esp. Statistics) or arrows appear, so allow those as well.
Argument : Expression extendArgument[$item[1]]

# extendArgument[$argpart] : recognize some longer form "arguments";
#   things that may look like relations.
extendArgument :
          /^\Z/ { $arg[0]; } # short circuit
        | relopExpr(s) extendArgument[NewFormula($arg[0],map(@$_,@{$item[1]}))]
        | METARELOP Formula extendArgument[Apply($item[1],$arg[0],$item[2])]
        | { $arg[0]; }

# makeComposition[$thing,$comp]; Given something that presumably is a function,
#   and a composition operator, read another function and possibly args
makeComposition :
          preScripted['FUNCTION'] addArgs[Apply($arg[1],$arg[0],$item[1])]
                   { $item[2]; }
        | preScripted['OPFUNCTION'] addOpFunArgs[Apply($arg[1],$arg[0],$item[1])]
                   { $item[2]; }
        | preScripted['TRIGFUNCTION']
          addTrigFunArgs[Apply($arg[1],$arg[0],$item[1])]       { $item[2]; }
        # Given an explicit composition operator, the next thing may safely(?)
        # be assumed to be a function, so treat it as such.
        | Factor addArgs[Apply($arg[1],$arg[0],$item[1])]               { $item[2]; }

# addOpArgs[$bigop]; Add following Term to a bigop, if present.
addOpArgs :
          /^\Z/ { $arg[0];}   # short circuit!
        # Is the APPLYOP getting "lost" here?
        | APPLYOP(?) Factor moreOpArgFactors[$item[2]] { Apply($arg[0],$item[3]);}
        | { $arg[0]; }

# moreOpArgFactors[$factor1] : Similar to moreFactors, 
#   but w/o evalAtOp since that most likely belongs to the operator, not
#   the factors.
moreOpArgFactors :
          /^\Z/ { $arg[0];}   # short circuit!
        | MulOp Factor moreOpArgFactors[ApplyNary($item[1],$arg[0],$item[2])]
        | Factor moreOpArgFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }

# addIntOpArgs[$bigop]; Add following Term to a INTOP as integrand, if present.
#   The main point here is to recognize a "d" as a diff operator.
#   This is insufficient, in general, because the "d" may be contained within
#   a subexpression, particularly a fraction; the top-level parsing needs to be able
#   to parse subexpressions within a context, and yet, needs to parse the subexpressions
#   beforehand to (potentially) determine the role of the subexpression!
addIntOpArgs :
          /^\Z/ { $arg[0];}   # short circuit!
        # Is the APPLYOP getting "lost" here?
        | APPLYOP(?) IntFactor moreIntOpArgFactors[$item[2]] { Apply($arg[0],$item[3]);}
        | { $arg[0]; }

# moreIntOpArgFactors[$factor1] : Similar to moreOpArgFactors, 
#   but recognizing d as diff
moreIntOpArgFactors :
          /^\Z/ { $arg[0];}   # short circuit!
        | MulOp IntFactor moreIntOpArgFactors[ApplyNary($item[1],$arg[0],$item[2])]
        | IntFactor moreIntOpArgFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }

IntFactor :
##          diffd ATOM_OR_ID { Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[2]); }
##        | diffd UNKNOWN    { Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[2]); }
          diffd ATOM_OR_ID addScripts[$item[2]]
           { Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[3]); }
        | diffd UNKNOWN addScripts[$item[2]]
           { Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[3]); }
        | Factor           { $item[1]; }

diffd :
          /UNKNOWN:d:\d+/           { Lookup($item[1]); }
        | /ID:d:\d+/                { Lookup($item[1]); }

# Punctuation separating function arguments; things marked MIDDLE could
# also separate arguments
# With great trepidation, I'm adding VERBAR here
argPunct : PUNCT | MIDDLE | VERTBAR

# ================================================================================
# Operator args, etc.

# nestOperators[$operator*]; Nest a possible sequence of operators
nestOperators :
          /^\Z/ { recApply(@arg); }
        | OPERATOR addScripts[$item[1]] nestOperators[@arg,$item[2]]
        | FUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
        | OPFUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
        | TRIGFUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
        | OPEN Expression balancedClose[$item[1]] 
               { recApply(@arg[0..$#arg-1],
                        ApplyDelimited($arg[$#arg],$item[1],$item[2],$item[3])); }
        | { recApply(@arg); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# (slightly) structured operators
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Same as anyop, at the moment.
AnyOp   : relop | METARELOP | ARROW | AddOp | MulOp | MODIFIEROP
        | preScripted['bigop']
        | OPERATOR addScripts[$item[1]]

# Sub or superscripts on operators;
# we recognize the structure, not necessarily the meaning
AddOp : BINOP addOpDecoration[$item[1]]
      | ADDOP addOpDecoration[$item[1]]

MulOp : BINOP addOpDecoration[$item[1]]
      | MULOP addOpDecoration[$item[1]]
# (BINOP can never really be satisfactory; it comes from something marked
#  as \mathbin; we don't know any more about it)

# addOpDecoration[$op] : Decorations for an operator;
#   Same thing as addScripts, but not allowing POSTFIX
addOpDecoration :
          /^\Z/ { $arg[0];}   # short circuit!
        | POSTSUPERSCRIPT  addOpDecoration[DecorateOperator($arg[0],$item[1])]
        | POSTSUBSCRIPT    addOpDecoration[DecorateOperator($arg[0],$item[1])]
        | { $arg[0]; }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Pseudo-Terminals. 
#  Useful combinations or subsets of terminals.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# A generalized relational operator or arrow
# Note we disallow < or > if we're parsing the contents of a bra or ket!
relop   : 
          { ($forbidLRAngle ? 1 : undef); } /RELOP:(less|greater)-than:\d+/ <commit> <reject>
        | RELOP addOpDecoration[$item[1]]
        | ARROW addOpDecoration[$item[1]]

# Check out whether diffop should be treated as bigop or operator
# It depends on the binding 
bigop   : BIGOP | SUMOP | INTOP | LIMITOP | DIFFOP
operator: OPERATOR

# SUPOP is really only \prime(s) (?)
supops   : SUPOP(s)                             { New(undef,
                                                      join('',map($_->textContent,@{$item[1]})),
                                                       name=>'prime'.scalar(@{$item[1]})); }

# ================================================================================
# And some special cases...

# balancedClose[$open] : Match a CLOSE that `corresponds' to the OPEN
balancedClose : CLOSE { (isMatchingClose($arg[0],$item[1]) ? 1 : undef) } { $item[1]; }

# The "evaluated at" operator, typically a vertical bar followed by a subscript
# equation. But it is ofen used in \left. \right| pairs!
evalAtOp : VERTBAR
         | /CLOSE:\|:\d+/       { Lookup($item[1]); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terminals / Lexer
#   These correspond to the TeX tokens.
# The Lexer strings are of the form TYPE:NAME:NUMBER where
#    TYPE is the grammatical role, or part of speech,
#    NAME is the specific name (semantic or presentation) of the token 
#    NUMBER is the position of the specific token in the current token sequence.
#
# NOTE: RecDescent doesn't clearly distinguish lexing from parsing
# and so it allows us to interpret the same item as several distinct
# terminals; Presumably other parsers would not allow this.
# In a couple of cases, we have symbols that can be used in a few
# different ways:
#   | as vertical bar, open or close, also as a close used for eval-at!
#   : as meta-relation, as such-that
#   <, >  can be relop or part of brackets (eg. qm, etc)
# Perhaps these symbols should get a special role reflecting it's specialness
# and then have pseudo-terminals that combine (eg. relop == RELOP | langle)
# This nibbles at the edge of the Ambiguity issue; if it turns out that
# a multi-meaning symbol gets used in a particular way, we'd want to assure
# that it's role, meaning, etc, gets changed to reflect the specific usage!
#
# Upon reflection, this implies that OPEN|CLOSE are rather awkward as roles.
# \left< can be an OPEN _or_ RELOP
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

ATOM            : /ATOM:\S*:\d+/                { Lookup($item[1]); }
UNKNOWN         : /UNKNOWN:\S*:\d+/             { Lookup($item[1]); }
ID              : /ID:\S*:\d+/                  { Lookup($item[1]); }
ARRAY           : /ARRAY:\S*:\d+/               { Lookup($item[1]); }
NUMBER          : /NUMBER:\S*:\d+/              { Lookup($item[1]); }
PUNCT           : /PUNCT:\S*:\d+/               { Lookup($item[1]); }
PERIOD          : /PERIOD:\S*:\d+/              { Lookup($item[1]); }
RELOP           : /RELOP:\S*:\d+/               { Lookup($item[1]); }
LANGLE          : /RELOP:less-than:\d+/         { Lookup($item[1]); }
                | /OPEN:langle:\d+/             { Lookup($item[1]); }
RANGLE          : /RELOP:greater-than:\d+/      { Lookup($item[1]); }
                | /CLOSE:rangle:\d+/            { Lookup($item[1]); }
MIDBAR          : /VERTBAR:\S*:\d+/             { Lookup($item[1]); }
                | /MIDDLE:\|:\d+/               { Lookup($item[1]); }
                | /MIDDLE:parallel-to:\d+/      { Lookup($item[1]); }
LBRACE          : /OPEN:\{:\d+/                 { Lookup($item[1]); }
RBRACE          : /CLOSE:\}:\d+/                { Lookup($item[1]); }
METARELOP       : /METARELOP:\S*:\d+/           { Lookup($item[1]); }
MODIFIEROP      : /MODIFIEROP:\S*:\d+/          { Lookup($item[1]); }
MODIFIER        : /MODIFIER:\S*:\d+/            { Lookup($item[1]); }
ARROW           : /ARROW:\S*:\d+/               { Lookup($item[1]); }
ADDOP           : /ADDOP:\S*:\d+/               { Lookup($item[1]); }
MULOP           : /MULOP:\S*:\d+/               { Lookup($item[1]); }
BINOP           : /BINOP:\S*:\d+/               { Lookup($item[1]); }
POSTFIX         : /POSTFIX:\S*:\d+/             { Lookup($item[1]); }
FUNCTION        : /FUNCTION:\S*:\d+/            { Lookup($item[1]); }
OPFUNCTION      : /OPFUNCTION:\S*:\d+/          { Lookup($item[1]); }
TRIGFUNCTION    : /TRIGFUNCTION:\S*:\d+/        { Lookup($item[1]); }
APPLYOP         : /APPLYOP:\S*:\d+/             { Lookup($item[1]); }
COMPOSEOP       : /COMPOSEOP:\S*:\d+/           { Lookup($item[1]); }
SUPOP           : /SUPOP:\S*:\d+/               { Lookup($item[1]); }
OPEN            : /OPEN:\S*:\d+/                { Lookup($item[1]); }
SCRIPTOPEN      : /OPEN:\{:\d+/                 { Lookup($item[1]); }
CLOSE           : /CLOSE:\S*:\d+/               { Lookup($item[1]); }
MIDDLE          : /MIDDLE:\S*:\d+/              { Lookup($item[1]); }
VERTBAR         : /VERTBAR:\S*:\d+/             { Lookup($item[1]); }
SINGLEVERTBAR   : /VERTBAR:\|:\d+/              { Lookup($item[1]); }
BIGOP           : /BIGOP:\S*:\d+/               { Lookup($item[1]); }
SUMOP           : /SUMOP:\S*:\d+/               { Lookup($item[1]); }
INTOP           : /INTOP:\S*:\d+/               { Lookup($item[1]); }
LIMITOP         : /LIMITOP:\S*:\d+/             { Lookup($item[1]); }
DIFFOP          : /DIFFOP:\S*:\d+/              { Lookup($item[1]); }
OPERATOR        : /OPERATOR:\S*:\d+/            { Lookup($item[1]); }
##DIFF          : /DIFF:\S*:\d+/                { Lookup($item[1]); }
POSTSUBSCRIPT   : /POSTSUBSCRIPT:\S*:\d+/       { Lookup($item[1]); }
POSTSUPERSCRIPT : /POSTSUPERSCRIPT:\S*:\d+/     { Lookup($item[1]); }
FLOATSUPERSCRIPT : /FLOATSUPERSCRIPT:\S*:\d+/   { Lookup($item[1]); }
FLOATSUBSCRIPT  : /FLOATSUBSCRIPT:\S*:\d+/      { Lookup($item[1]); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
