/*  $Header: charclas.c,v 3.1 88/06/09 15:28:12 sylvia Exp $ */
/*
 *  This file is part of the Amsterdam SGML Parser.
 *
 *  Copyright: Faculteit Wiskunde en Informatica
 *             Department of Mathematics and Computer Science
 *             Vrije Universiteit Amsterdam
 *             The Netherlands
 *
 *  Authors:   Sylvia van Egmond
 *             Jos Warmer
 */
#include "types.h"
#include "charclas.h"
#include "lexical.h"
#include "set.h"
#ifdef DOC_PARSER
#include "charclas.i"
#endif
/*
 *  Basis character classes.
 *
 *  Most of these classes will eventually become macro definitions,
 *  because of the efficiency.
 *  ASCII ordering on the character set is assumed.
 */
static  P_Set  nonsgml_set = 0;

Bool    is_SPACE(ch)
int ch;
{
    return ( ch==' ' );
}
Bool    is_RE(ch)
int ch;
{
    return ( ch=='\r' );
}

Bool    is_RS(ch)
int ch;
{
    return ( ch=='\n' );
}

Bool    is_SEPCHAR(ch)
int ch;
{
    return (ch=='\t');
}

Bool    is_DATACHAR(ch)
int ch;
{
    return TRUE;
}

Bool    is_DELMCHAR(ch)
int ch;
{
    return FALSE;
}

Bool is_SPECIAL(ch)
int ch;
{
    return( (ch=='\'') or (ch=='(') or (ch==')') or (ch=='+') or (ch==',') or
	    (ch=='.') or (ch=='-') or (ch=='/') or (ch==':') or (ch=='=') or
	    (ch=='?') );
}

Bool    is_DIGIT(ch)
int ch;
{
    return( (ch>='0') and (ch<='9') );
}
/*
 *  The user is not allowed to add anything to the NMCHAR/NMSTRT sets
 */
Bool    is_LCNMCHAR(ch)
int ch;
{
    return ( (ch == '-') or (ch == '.') );
}

Bool    is_UCNMCHAR(ch)
int ch;
{
    return ( (ch == '-') or (ch == '.') );
}

Bool    is_LCNMSTRT(ch)
int ch;
{
    return FALSE;
}

Bool    is_UCNMSTRT(ch)
int ch;
{
    return FALSE;
}

Bool    is_LC_LETTER(ch)
int ch;
{
    return( (ch>='a') and (ch<='z') );
}

Bool    is_UC_LETTER(ch)
int ch;
{
    return( (ch>='A') and (ch<='Z') );
}

Bool    is_MSOCHAR(ch)
int ch;
{
    return FALSE;
}

Bool    is_MSICHAR(ch)
int ch;
{
    return FALSE;
}

Bool    is_MSSCHAR(ch)
int ch;
{
    return FALSE;
}

Bool    is_FUNCHAR(ch)
int ch;
{
    return FALSE;
}

/*
 *  Composite character classes corresponding to rules in the grammar.
 */
Bool    is_s(ch)
int ch;
{
    return( is_SPACE(ch) or is_RE(ch) or is_RS(ch) or is_SEPCHAR(ch) );
}

Bool    is_data_character(ch)
int ch;
{
    return( is_SGML_character(ch) );
}

Bool    is_SGML_character(ch)
int ch;
{
    return( is_markup_character(ch) or is_DATACHAR(ch) );
}

Bool    is_markup_character(ch)
int ch;
{
    return( is_name_character(ch) or is_SPACE(ch) or
            is_function_character(ch) or is_DELMCHAR(ch) );
}

Bool    is_name_character(ch)
int ch;
{
    return( is_name_start_character(ch) or is_DIGIT(ch) or
	    is_LCNMCHAR(ch) or is_UCNMCHAR(ch) );
}

Bool    is_name_start_character(ch)
int ch;
{
    return( is_LC_LETTER(ch) or is_UC_LETTER(ch) or is_LCNMSTRT(ch) or
	    is_UCNMSTRT(ch) );
}

Bool    is_function_character(ch)
int ch;
{
    return( is_RE(ch) or is_RS(ch) or is_SEPCHAR(ch) or is_MSOCHAR(ch) or
	    is_MSICHAR(ch) or is_MSSCHAR(ch) or is_FUNCHAR(ch) );
}

Bool    is_alpha(ch)
int    ch;
{
    return( is_LC_LETTER(ch) or is_UC_LETTER(ch) );
}

Bool is_NONSGML(ch)
int ch;
{
    return set_member(nonsgml_set, ch);
}

#ifdef GENERATOR
void unused_char(ch)
int ch;
{
    set_add(nonsgml_set, ch);
}

void generate_unused(file)
FILE  *file;
{
    int ch;

    fprintf(file,"int unused_chars[] = {\n");
    for( ch = 0; ch <= 255; ch++){
	if( set_member(nonsgml_set, ch) ){
	    fprintf(file, "%d,\n", ch);
	}
    }
    fprintf(file,"-1 };\n");
}

void unused_standard()
{
    int ch;

    for(ch = 0 ; ch <=  8 ; ch++){ set_add(nonsgml_set, ch); }
    for(ch = 11; ch <= 12 ; ch++){ set_add(nonsgml_set, ch); }
    for(ch = 14; ch <= 31 ; ch++){ set_add(nonsgml_set, ch); }
    set_add(nonsgml_set, 127);
    for(ch =128; ch <=159 ; ch++){ set_add(nonsgml_set, ch); }
    set_add(nonsgml_set, 255);
}
#endif

void init_charclas()
{
#ifdef DOC_PARSER
    int i;
#endif

    nonsgml_set = new_set(256);

#ifdef DOC_PARSER
    for(i = 0; unused_chars[i] != -1; i++){
	set_add(nonsgml_set, unused_chars[i]);
    }
#endif
}
