////////////////////////////////////////////////////////////////////////////////
// c++file.c
//
// This .c file, written in C++, is intended to be included in etags++.c and hier++.c.
// It is a quick-and-dirty "fuzzy" parser for C++ files that identifies enough tokens for
// etags++ and hier++ to do a good job.  See those files for information on the resultant
// functionality.  This file simply provides the common parsing code.
//
// Author:  Brian M. Kennedy
// (C) Copyright 1993, Intellection Inc.
// Permission is granted to use, copy, or modify this code as long as this author and
// copyright notice is maintained in all copies.
//
// Note:
//   This is quick, hack code that was not written to be modifiable or maintainable -- beware!!
//   I would not allow code such as this into our product!  But it is okay for a quick tool hack.
//   If you are a user, I hope you enjoy it.  If you are modifier, my apologies ;-(

#include <stdlib.h>
#include <iostream.h>
#include <fstream.h>
#include <ctype.h>
#include <string.h>


typedef char Boolean;
#define FALSE 0
#define TRUE  1


// Exit Status
#define GOOD 0
#define BAD  1


////////////////////////////////////////////////////////////////////////////////

inline Boolean
isident (char c)
{ return c == '_' || isalnum(c); }


unsigned
size (unsigned num)
{ unsigned ret = 0;
  while (num)
  { ++ret;
    num /= 10;
  }
  return ret;
}


////////////////////////////////////////////////////////////////////////////////

struct File
{
  int   size;
  char* chars;

  File (int max_size);
  File (int max_size, char* initial_string);

  void read (const char* filename);
};


File::
File (int max_size)
:size(max_size+1), chars(new char [size+2])
{
  chars[0] = 0;
  chars[size+1] = 0;
}


File::
File (int max_size, char* initial_string)
:size(max_size), chars(new char [size+2])
{
  chars[0] = 0;
  strcpy(chars+1, initial_string);
  chars[size+1] = 0;
}


void File::
read (const char* filename)
{ ifstream is (filename);
  is.get(chars+1, size, 0);	// note: chars is indexed-based at 1, not 0
}


////////////////////////////////////////////////////////////////////////////////

struct File_Pos
{
  const File* file;
  unsigned    char_no;
  unsigned    line_no;

  File_Pos (const File& file_arg) :file(&file_arg), char_no(1), line_no(1) {}
  File_Pos (const File_Pos& p)    :file(p.file), char_no(p.char_no), line_no(p.line_no) {}

  const char* chars () const           { return file->chars + char_no; }
  char        chars (unsigned n) const { return chars()[n]; }

  File_Pos& inc   ();
  File_Pos& inc   (unsigned n);

  inline  Boolean match (const char* string, unsigned size);
  Boolean match (const char* string) { return match(string, strlen(string)); }
  Boolean match (Boolean (*fn)(char c));

  void find_match (const char* string, unsigned size);
  void find_match (const char* string) { find_match(string, strlen(string)); }
  void find_match (Boolean (*fn)(char c));
  void find_match (char match, char escape);

  void find_prev_newline ();
};


inline File_Pos& File_Pos::
inc ()
{ char c = chars(0);
  if (c)
  { ++char_no;
    if (c == '\n')
      ++line_no;
  }
  return *this;
}


File_Pos& File_Pos::
inc (unsigned n)
{ for(unsigned i = 0; i < n; ++i)
    inc();
  return *this;
}


inline Boolean File_Pos::
match (const char* string, unsigned size)
{ if (strncmp(string, chars(), size))
    return FALSE;
  else
  { inc(size);
    return TRUE;
  }
}


Boolean File_Pos::
match (Boolean (*fn)(char c))
{ Boolean ret = fn(chars(0));
  while (fn(chars(0)))
    inc();
  return ret;
}


void File_Pos::
find_match (const char* string, unsigned size)
{
  while (!match(string,size) && chars(0))
    inc();
}

void File_Pos::
find_match (Boolean (*fn)(char c))
{
  while (!match(fn) && chars(0))
    inc();
}


void File_Pos::
find_match (char match, char escape)
{
  while (chars(0) && chars(0) != match)
  { if (chars(0) == escape)
      inc();
    inc();
  }
  inc();
}


void File_Pos::
find_prev_newline ()
{
  while (chars(0) && chars(0) != '\n')
    --char_no;
  ++char_no;
}


////////////////////////////////////////////////////////////////////////////////

enum C_Token
{ CHARACTER, ESCAPED,
  TOKEN, COMMENT, DIRECTIVE, 
  CODE_TOKEN, CLASS_KW, STRUCT_KW, UNION_KW, ENUM_KW, TYPEDEF_KW,
  TEMPLATE_KW, TEMPLATE_ARGS,  
  IDENTIFIER, STRING_CONSTANT, CHAR_CONSTANT, DEFINE, 
  OPEN_PARE, CLOSE_PARE, OPEN_BRACE, CLOSE_BRACE, COLONS, COMMA, SEMI_COLON, EQUAL,
  NOTE, EXPORT, DECLARE_MACRO, 
  DEFINE_MACRO, DEFINE_GET, DEFINE_SET, DEFINE_GETSET, DEFINE_INC, DEFINE_DEC,
  END_OF_FILE
};


struct C_File_Pos
:public File_Pos
{
  C_Token  token;
  unsigned length;

  C_File_Pos (const File& file_arg);

  void identify_token  ();
  void next_char       ();
  void next_token      ();
  void next_code       ();
  void next_identifier ();

  void close_brace   ();
  void close_pare    ();
  void close_func    ();
  void close_define  ();

};


C_File_Pos::
C_File_Pos (const File& file_arg)
:File_Pos(file_arg), token(CHARACTER), length(1)
{ identify_token(); }


#define TOKEN_IDENTIFIER           \
{ pos.match(isident);              \
  token  = IDENTIFIER;             \
  length = pos.char_no - char_no;  \
}


void C_File_Pos::
identify_token ()
{
  File_Pos pos (*this);
  switch(chars(0))
  {
  case '\0':
    token = END_OF_FILE;
    length = 1;
    break;
  case 'c':
    if (pos.match("class", 5) && !isident(pos.chars(0)))
    { token = CLASS_KW;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case 's':
    if (pos.match("struct", 6) && !isident(pos.chars(0)))
    { token = STRUCT_KW;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case 'u':
    if (pos.match("union", 5) && !isident(pos.chars(0)))
    { token = UNION_KW;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case 'e':
    if (pos.match("enum", 4) && !isident(pos.chars(0)))
    { token = ENUM_KW;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case 't':
    if (pos.match("typedef", 7))
    { if (!isident(pos.chars(0)))
      { token = TYPEDEF_KW;
	length = pos.char_no - char_no;
      }
      else
	TOKEN_IDENTIFIER;
    }
    else if (pos.match("template", 8) && !isident(pos.chars(0)))
    { token = TEMPLATE_KW;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case '#':
    do
    { pos.inc();
    } while (pos.chars(0) != '\n' && isspace(pos.chars(0)));
    if (pos.match("define", 6) && !isident(pos.chars(0)))
    { token = DEFINE;
      length = pos.char_no - char_no;
    }
    else
    { pos.find_match('\n', '\\');
      token  = DIRECTIVE;
      length = pos.char_no - char_no;
    }
    break;
  case '(':
    token = OPEN_PARE;
    length = 1;
    break;
  case ')':
    token = CLOSE_PARE;
    length = 1;
    break;
  case '{':
    token = OPEN_BRACE;
    length = 1;
    break;
  case '}':
    token = CLOSE_BRACE;
    length = 1;
    break;
  case '<': {
    unsigned last_more = pos.char_no;
    pos.inc();
    while (!strchr("{}\"\';\n", pos.chars(0)))
    { if (pos.chars(0) == '>')
	last_more = pos.char_no;
      pos.inc();
    }
    if (last_more != char_no)
    { token = TEMPLATE_ARGS;
      length = last_more - char_no + 1;
    }
    else
    { token = CHARACTER;
      length = 1;
    }
    break;
  }
  case ':':
    if (pos.match("::", 2))
    { token = COLONS;
      length = 2;
    }
    else
    { token = CHARACTER;
      length = 1;
    }
    break;
  case ',':
    token = COMMA;
    length = 1;
    break;
  case ';':
    token = SEMI_COLON;
    length = 1;
    break;
  case '=':
    token = EQUAL;
    length = 1;
    break;
  case '/':
    if (pos.match("//", 2))
    { pos.find_match("\n", 1);
      token = COMMENT;
      length = pos.char_no - char_no;
    }
    else if (pos.match("/*", 2))  // */
    { pos.find_match("*/", 2);
      token = COMMENT;
      length = pos.char_no - char_no;
    }
    else
    { token = CHARACTER;
      length = 1;
    }
    break;
  case '\"':
    pos.inc();
    pos.find_match('\"', '\\');
    token = STRING_CONSTANT;
    length = pos.char_no - char_no;
    break;
  case '\'':
    pos.inc();
    pos.find_match('\'', '\\');
    token = CHAR_CONSTANT;
    length = pos.char_no - char_no;
    break;
  case '\\':
    token = ESCAPED;
    length = 2;
    break;
  case 'R':
    if (pos.match("RWExport") && !isident(pos.chars(0)))
    { token = COMMENT;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;    
    break;
  case 'D':
    if (pos.match("DEFINE_"))
    { switch(pos.chars(0))
      {
      case 'G':
	token = (chars(3) == 'S') ? DEFINE_GETSET : DEFINE_GET;
	break;
      case 'S':
	token = DEFINE_SET;
	break;
      case 'I':
	token = DEFINE_INC;
	break;
      case 'D':
	token = DEFINE_DEC;
	break;
      default:
	token = DEFINE_MACRO;
      }
      pos.match(isident);
      length = pos.char_no - char_no;
    }
    else if (pos.match("DECLARE_"))
    { pos.match(isident);
      token = DECLARE_MACRO;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case 'N':
    if (pos.match("NOTE") && !isident(pos.chars(0)))
    { token = NOTE;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  case 'E':
    if (   pos.match("EXPORT_ACCESSOR") 
       || pos.match("EXPORT_PARM")
       || (pos.match("EXPORT") && !isident(pos.chars(0))))
    { token = EXPORT;
      length = pos.char_no - char_no;
    }
    else
      TOKEN_IDENTIFIER;
    break;
  default:
    if (!isalpha(chars(0)))
    { token = CHARACTER;
      length = 1;
    }
    else
      TOKEN_IDENTIFIER;
  }
}


inline void C_File_Pos::
next_char ()
{ inc(length);
  identify_token();
}


void C_File_Pos::
next_token ()
{ do
  { next_char();
  } while (token <= TOKEN);
}


void C_File_Pos::
next_code ()
{ do
  { next_char();
  } while (token <= CODE_TOKEN);
}


void C_File_Pos::
next_identifier ()
{ do
  { next_char();
  } while (token != IDENTIFIER && token != END_OF_FILE);
}


void C_File_Pos::
close_brace ()
{ int level = 1;
  while (level)
  { next_code();
    if (token == OPEN_BRACE)
      ++level;
    else if (token == CLOSE_BRACE)
      --level;
    else if (token == END_OF_FILE)
      level = 0;
  }
}


void C_File_Pos::
close_pare ()
{ int level = 1;
  while (level)
  { next_code();
    if (token == OPEN_PARE)
      ++level;
    else if (token == CLOSE_PARE)
      --level;
    else if (token == END_OF_FILE)
      level = 0;
  }
}


void C_File_Pos::
close_func ()
{ while (token != SEMI_COLON && token != OPEN_BRACE && token != END_OF_FILE)
    next_code();
  if (token == OPEN_BRACE)
    close_brace();
}


void C_File_Pos::
close_define ()
{ while (chars(0) && chars(0) != '\n')
  { if (chars(0) == '\\')
      inc();
    inc();
  }
  if (chars(0))
    token = CHARACTER;
  else
    token = END_OF_FILE;
  length = 1;
}
