// zeal - A portable Glk-based Z-code interpreter
// Copyright (C) 2000 Jeremy Condit <jcondit@eecs.harvard.edu>
// 
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

// =======================================================================
//  dict.cc:
//
//  the dictionary class represents a dictionary in the z-machine's
//  memory, and is used for lookup, lexical analysis, and encoding.
// =======================================================================
 
#include <ctype.h>

#include "zeal.h"
#include "error.h"
#include "dict.h"
#include "machine.h"
#include "alpha.h"

extern machine* m;

// =======================================================================
//  dictionary
// =======================================================================

// constructor/destructor
//
// creates an object representing a dictionary somewhere in the game's
// memory.

dictionary::dictionary(address addr)
  : dict(addr), entry_len(0), num_entries(0), sorted(true),
    seps(NULL), num_seps(0), alpha()
{
    if (dict == 0) {
        dict = m->read_word(HEADER_DICTIONARY_BASE);
    }

    num_seps = m->read_byte(dict++);
    seps = new ubyte [num_seps];
    MEMCHECK(seps);
    for (ubyte i = 0; i < num_seps; i++) {
        seps[i] = m->read_byte(dict++);
    }

    entry_len = m->read_byte(dict++);
    ASSERT((m->version() <= 3 && entry_len > 4) ||
           (m->version() >= 4 && entry_len > 6));

    num_entries = m->read_word(dict);
    if (num_entries < 0) {
        num_entries *= -1;
        sorted = false;
    }
    dict += 2;
}

dictionary::~dictionary()
{
    delete [] seps;
}

// z_encode
//
// encode the string between addr and end as a set of z-chars in result.
// note that result should be 3 words wide.

ubyte
dictionary::z_encode(uword* result, address* addr, address end)
{
    ASSERT(*addr < end);

    ubyte zscii;
    ubyte len = 0;
    ubyte cur = 0;

    result[0] = 0;
    result[1] = 0;
    result[2] = 0;

    zscii = m->read_byte(*addr);
    ASSERT(zscii != 0 && zscii != ' ');

    if (is_separator(zscii)) {
        // it's a separator, so it's a word on its own
        add_zscii(result, &cur, tolower(zscii));
        len++;
        (*addr)++;
    } else {
        // it's not a separator, so read 'til we find one or hit the end
        do {
            add_zscii(result, &cur, tolower(zscii));
            len++;
            (*addr)++;
            if (*addr < end) {
                zscii = m->read_byte(*addr);
                ASSERT(zscii != 0);
            }
        } while (*addr < end && zscii != ' ' && !is_separator(zscii));
    }

    // fill out the rest of the result words
    while (add_zchar(result, &cur, 5)) {
        // do nothing
    }

    // set the "end of word" flag
    result[(m->version() <= 3) ? 1 : 2] |= 0x8000;

    return len;
}

// lookup
//
// look up a z-encoded word in the dictionary.  we use a pretty dumb
// algorithm now--just do a brute force linear search through the
// dictionary.

address
dictionary::lookup(uword target[3])
{
    address cur = dict;

    for (int i = 0; i < num_entries; i++) {
        if (m->read_word(cur) == target[0] &&
            m->read_word(cur + 2) == target[1] &&
            (m->version() <= 3 || m->read_word(cur + 4) == target[2])) {
            return cur;
        } else {
            cur += entry_len;
        }
    }

    return 0;
}

// lex
//
// perform lexical analysis on a text buffer, placing the result in a
// parse buffer.  the preserve flag indicates whether unrecognized entries
// are the be preserved or overwritten.

void
dictionary::lex(address text, address parse, bool preserve)
{
    ubyte np = m->read_byte(parse);
    if (np == 0) {
        FATAL("Parse buffer too small!");
    }

    ubyte nw = 0;
    address paddr = parse + 2;

    // set up address range
    address addr;
    address end;
    if (m->version() <= 4) {
        addr = text + 1;
        for (end = addr; m->read_byte(end) != 0; end++) { }
    } else {
        addr = text + 2;
        end = addr + m->read_byte(text + 1);
    }

    // keep finding words and encoding them 'til we're out of room in the
    // parse buffer or we run out of words to encode
    while (addr < end && nw < np && next_word(&addr, end)) {
        // encode and look up the word
        uword word[3];
        ubyte offset = addr - text;
        ubyte length = z_encode(word, &addr, end);
        uword entry = lookup(word);

        // write it to the parse buffer
        if (entry != 0 || !preserve) {
            m->write_word(paddr, entry);
            m->write_byte(paddr + 2, length);
            m->write_byte(paddr + 3, offset);
        }
        
        paddr += 4;
        nw++;
    }

    // fill out the length of the final parse buffer
    m->write_byte(parse + 1, nw);
}

// add_zchar
//
// add a z-char to the next available 5-bit slot in the result word, as
// indicated by cur.  returns whether or not room was available.  result
// should have at least 2 words for versions <= 3, 3 words otherwise.

bool
dictionary::add_zchar(uword* result, ubyte* cur, ubyte zchar)
{
    ASSERT((zchar & 0xe0) == 0);

    if ((m->version() <= 3 && *cur > 5) || (m->version() >= 4 && *cur > 8)) {
        return false;
    }

    ubyte i = (*cur) / 3;
    ubyte j = (*cur) % 3;

    result[i] |= (zchar << ((2 - j) * 5));

    (*cur)++;

    return true;
}

// add_zscii
//
// add a zscii character to a result word, first by trying to encoding it
// using the alphabet table, and then by actually encoding the zscii
// character itself.

bool
dictionary::add_zscii(uword* result, ubyte* cur, ubyte zscii)
{
    int a;
    ubyte zchar;
    
    if (alpha.zscii_to_zchar(zscii, &a, &zchar)) {
        if (a > 0) {
            add_zchar(result, cur, a + ((m->version() < 3) ? 1 : 3));
        }
        return add_zchar(result, cur, zchar);
    } else {
        add_zchar(result, cur, 5);
        add_zchar(result, cur, 6);
        add_zchar(result, cur, zscii >> 5);
        return add_zchar(result, cur, zscii & 0x1f);
    }
}

// next_word
//
// helper to skip past whitespace.

bool
dictionary::next_word(address* addr, address end)
{
    ubyte zscii;

    while (*addr < end && (zscii = m->read_byte(*addr)) == ' ') {
        (*addr)++;
    }

    return (*addr < end);
}

// is_separator
//
// predicate to determine whether a given character is a separator
// according to the current dictionary.

bool
dictionary::is_separator(ubyte c)
{
    for (ubyte i = 0; i < num_seps; i++) {
        if (c == seps[i]) {
            return true;
        }
    }

    return false;
}
