/* parse.h: -*- C -*-  DESCRIPTIVE TEXT. */

/*  Copyright (c) 1996 Universal Access Inc.
    Author: E. B. Gamble Jr. (ebg@ai.mit.edu) Thu Nov  7 15:13:38 1996.  */

#if !defined (_BC_PARSE_H_)
#define _BC_PARSE_H_ 1

#include "object.h"

/* Forward Declarations */
typedef struct bc_parse *bc_parse_t;


/************************************************************************
 *
 * PARSE Examples
 *
 * =====
 *
 *  "foo bar <baz x <bing y> z> bong <bang a>"
 *
 *  {TEXT {FMT "foo bar " 0 " bong " 1}
 *        {TAG {FMT "<baz x " 2 " z>"}
 *             {SYM baz}
 *             {SYM x}
 *             {TAG "<bing y>"
 *                  {SYM bing}
 *                  {SYM y}}
 *             {SYM z}}
 *        {TAG "<bang a>"
 *              {SYM bang}
 *              {SYM a}}}
 *  
 * ===== 
 *
 *  "abc <defun foo x y> This is : <get-var "x">, <get-var y>. </defun> xyz"
 *
 *  {TEXT {FMT "abc " 0 " This is : " 1 ", " 2 ". " 3 " xyz"}
 *        {TAG {FMT "<defun foo x y>"}
 *             {SYM defun}
 *             {SYM foo}
 *             {SYM x}
 *             {SYM y}}
 *        {TAG {FMT "<get-var x>"}
 *             {SYM get-var}
 *             {STR "x"}}
 *        {TAG {FMT "<get-var y>"}
 *             {SYM get-var}
 *             {SYM y}}
 *        {TAG {FMT "</defun>"}
 *             {SYM /defun}}}
 *
 * =====
 *
 * <if "abc" <get-var xyz>> 
 *
 * {TAG {FMT "<if " 1 " " 2 ">"}
 *      {SYM if}
 *      {STR "abc"}
 *      {TAG {FMT "<get-var xyz>"}
 *           {SYM get-var}
 *           {SYM xyz}}}
 *
 *
 * =====
 *
 * <prog "abc" <not xyz>> 
 *
 *  {TAG {FMT "<prog " 1 " " 2 ">"}
 *       {SYM prog}
 *       {STR "abc"}
 *       {TAG {FMT "<not xyz>"}
 *	      {SYM not}
 *	      {SYM xyz}}}
 *
 * =====
 *
 *  <while <get-var xyz>>abc <set-var xyz "false"></while> 
 *
 *  {TEXT {FMT 0 "abc " 1 2}
 *        {TAG {FMT "<while " 1 ">"}
 *             {SYM while}
 *	       {TAG {FMT "<get-var xyz>"}
 *	            {SYM get-var}
 *		    {SYM xyz}}}
 *	  {TAG {FMT "<set-var xyz \"false\">"}
 *	       {SYM set-var}
 * 	       {SYM xyz}
 *	       {STR "false"}}}
 *
 * =====
 *
 *  <add 1.2 "3.8">
 *
 *  {TAG {FMT "<add 1.2 \"3.8\">"}
 *       {SYM add}
 *       {NUM 1.2}
 *	 {STR "3.8"}}          ????
 *
 * =====
 *
 *  "abc <random 10> def"
 *
 *  {TEXT {FMT "abc " 0 " def"}
 *        {TAG {FMT "<random 10>"}
 *             {SYM random}
 *             {NUM 10}}}
 *
 * =====
 *
 * <set-var foo=<get-var bar> baz =  \"abc\">
 *
 *  {TAG {FMT "<set-var " 1 " " 2 ">"}
 *       {SYM "set-var"}
 *       {KEY {FMT 0 "=" 1}
 *            {SYM foo}
 *            {TAG {FMT "<get-var bar>"}
 *                 {SYM get-var}
 *                 {SYM bar}}}
 *       {KEY {FMT 0 "  =  " 1}
 *            {SYM baz}
 *            {STR "abc"}}}
 *
 * =====
 *
 * Example of the output from the expander's first pass in which complex
 * tags have been associated with their body.
 *
 * "abc <defun foo x y> This is : <get-var x>, <get-var y>. </defun> xyz"
 *
 *       {TEXT {FMT "abc " 0 " xyz"}
 *             {BLK {TAG {FMT "<defun foo x y>"}
 *                       {SYM "defun"}
 *                       {SYM "foo"}
 *                       {SYM "x"}
 *                       {SYM "y"}}
 *                  {TEXT {FMT " This is : " 0 ", " 1 ". "}
 *                        {TAG {FMT "<get-var x>"}
 *                             {SYM "get-var"}
 *                             {SYM "x"}}
 *                        {TAG {FMT "<get-var y>"}
 *                             {SYM "get-var"}
 *                             {SYM "y"}}}}}
 *
 */
typedef enum
{
  BC_PARSE_OP_SYMBOL,		/* symbol identifier */
  BC_PARSE_OP_STRING,		/* string data */
  BC_PARSE_OP_NUMBER,		/* number data */ 
  BC_PARSE_OP_TEXT,		/* metahtml text */
  BC_PARSE_OP_TAG,		/* function calls / special-forms / macros */
  BC_PARSE_OP_KEY,		/* key word + value */ 
  BC_PARSE_OP_ARRAY,            /* array reference */
  BC_PARSE_OP_BLK,		/* blocks (unused by parser)*/
  BC_NUMBER_OF_PARSE_OPS
} bc_parse_op_t;

/*
 * An Insane Doublely Recursive Structure
 *  Don't even ask
 *
 */
struct bc_parse
{
  /* The type for this BC_PARSE object */ 
  bc_parse_op_t op;
# define BC_PARSE_OP( parse )     ((parse)->op)

  /* A list of BC_PARSE objects are linked by this NEXT field.  A BC_PARSE
     with a NEXT field that is NULL is termed a 'single' parse; If the NEXT
     field is not NULL, the parse is termed 'multiple'. */
  bc_parse_t    next;
# define BC_PARSE_NEXT( parse )   ((parse)->next)

  union {

    /* METAHTML is a 'string language' in which the primary (only?)
       expressed values are strings and thus one might think that strings
       alone ought to be sufficient in the parser output.  But, the job of
       the compiler (that part outside the parser) is not to do the lexical
       analysis required to produce numbers, strings and symbols.  The
       parser produces that with BC_PARSE_OP_SYMBOL and BC_PARSE_OP_NUMBER.
       Because the string is retained (see below) the compiler has the
       option of using the string or the number / symbol as appropriate for
       the function/special-form to be compiled.

       I could imagine PASS ONE of a parser just using BC_PARSE_STRING and
       BC_PARSE_SYMBOL_STRING and then PASS TWO destructively modifying the
       PARSE to provide BC_PARSE_NUMBER and BC_PARSE_SYMBOL_SYMBOL. */

    /* BC_PARSE_SYMBOL */
    struct {
      bc_string_t string;
      bc_symbol_t symbol;
    } symbol;
#define BC_PARSE_SYMBOL_STRING( parse )   ((parse)->u.symbol.string)
#define BC_PARSE_SYMBOL_SYMBOL( parse )   ((parse)->u.symbol.symbol)

    /* BC_PARSE_STRING */
    bc_string_t string;
#define BC_PARSE_STRING( parse )   ((parse)->u.string)

    /* BC_PARSE_NUMBER */
    struct {
      bc_string_t string;
      bc_number_t number;
    } number;
#define BC_PARSE_NUMBER_STRING( parse )   ((parse)->u.number.string)
#define BC_PARSE_NUMBER_NUMBER( parse )   ((parse)->u.number.number)

    /* BC_PARSE_TEXT

       BC_PARSE_TEXT is the parse for arbitrary text like: anything between
       double quotes that is not a simple string and body of a complex. */
    struct {
      bc_format_t format;
      bc_parse_t  tags;			/* Multiple (TAG) */
    } text;
#define BC_PARSE_TEXT_FORMAT( parse )   ((parse)->u.text.format)
#define BC_PARSE_TEXT_TAGS( parse )     ((parse)->u.text.tags)

    /* BC_PARSE_TAG */
    struct {
      bc_format_t format;
      bc_parse_t  tags;			/* Multiple */
    } tag;
#define BC_PARSE_TAG_FORMAT( parse )    ((parse)->u.tag.format)
#define BC_PARSE_TAG_TAGS( parse )      ((parse)->u.tag.tags)
#define BC_PARSE_TAG_OPERATOR( parse )  ((parse)->u.tag.tags)
#define BC_PARSE_TAG_OPERANDS( parse )  		\
   (BC_PARSE_TAG_OPERATOR (parse)			\
    ? BC_PARSE_NEXT (BC_PARSE_TAG_OPERATOR (parse))	\
    : BC_PARSE_NULL)
		     
    /* BC_PARSE_KEY

       BC_PARSE_KEY is the parse for keywords in METAHTML tags.  A tag
       of "<set-var foo=<get-var bar>>" gets parsed as a TAG for 'set-var'
       with one KEY containing parses for 'foo' and '<get-var bar>'. */
    struct 
    {
      bc_format_t format;
      bc_parse_t  name;
      bc_parse_t  value;
    } key;
#define BC_PARSE_KEY_FORMAT( parse )   ((parse)->u.key.format)
#define BC_PARSE_KEY_NAME( parse )     ((parse)->u.key.name)
#define BC_PARSE_KEY_VALUE( parse )    ((parse)->u.key.value)

    /* BC_PARSE_ARRAY

       Derived from 'name[index]' */
    struct 
    {
      bc_format_t format;
      bc_parse_t  name;
      bc_parse_t  index;
    } array;
#define BC_PARSE_ARRAY_FORMAT( parse )   ((parse)->u.array.format)
#define BC_PARSE_ARRAY_NAME( parse )     ((parse)->u.array.name)
#define BC_PARSE_ARRAY_INDEX( parse )    ((parse)->u.array.index)

    /* BC_PARSE_BLK 

       This is not in the output of the parser.  It is used by the expander
       to group a complex tag with the tag's body once the tag has been
       identified as complex and the matching 'closer' has been found. That
       action can't be performed by the parser because the parser knows
       nothing about the values of tags (complex macro, simple macro,
       special-form, function, etc). */
    struct {
      bc_parse_t tag;
      bc_parse_t body;
    } blk;
#define BC_PARSE_BLK_TAG( parse )   ((parse)->u.blk.tag)
#define BC_PARSE_BLK_BODY( parse )  ((parse)->u.blk.body)
  } u;
};

#define BC_PARSE_NULL           ((bc_parse_t) NULL)

/* Return the number of parses linked (thourgh NEXT) from PARSE */
extern unsigned int
bc_parse_count (bc_parse_t parse);

#if defined (TEST)

struct bc_parse_test
{
  bc_parse_t (*parser) (void);
  bc_string_t name;
  bc_string_t string;
};

extern struct bc_parse_test
bc_parse_test_array [];

#endif /* defined (TEST) */

#endif /* ! _BC_PARSE_H_ */

/*
 * "foo bar <baz x <bing y> z> bong <bang a>"
 *
 *  A=> {TEXT "foo bar @0 bong @1"
 *        {TAG "<baz x @2 z>"
 *             {SYM "baz"}
 *             {SYM "x"}
 *             {TAG "<bing y>"
 *                  {SYM "bing"}
 *                  {SYM "y"}}
 *             {SYM "z"}}
 *        {TAG "<bang a>"
 *              {SYM "bang"}
 *              {SYM "a"}}}
 *
 * "foo bar <baz x <bing y> z> bong <bang a>"
 *
 *  B=> {TEXT
 *         "foo bar "
 *         {TAG {TEXT "baz x y " {TAG, {TEXT "bing z"} ...}},
 *              {SYM "baz"}
 *              {SYM "x"}
 *              {SYM "y"}
 *              {TAG {TEXT "bing z"}
 *                   {SYM "bing"}
 *                   {SYM "z"}}}
 *         " bong "
 *         {TAG {TEXT "bang a"}
 *              {SYM "bang"}
 *              {SYM "a"}}}
 *
 */
