#include <stdlib.h>
#include "oslib/font.h"

#include "ztypes.h"
#include "unicutils.h"
#include "text.h"


size_t strlen_u(const zword_t *s)
{
    int i;

    for (i=0; *s; i++, s++)
        continue;

    return i;
}

zword_t *strstr_u(const zword_t *a, const zword_t *b)
{
    int i;
    for (;;)
    {   for (i=0;; i++)
        {   zword_t ch = b[i];
            if (ch == 0) return (zword_t *)a;
            if (a[i] != ch) break;
        }
        if (*a++ == 0) return 0;
    }
}

zword_t *strcpy_u(zword_t *a, const zword_t *b)
{
    zword_t *p = a;
    while ((*p++ = *b++) != 0);
    return a;
}

zword_t *strncpy_u(zword_t *a, const zword_t *b, size_t n)
            /* as strcpy, but at most n chars */
            /* NB may not be nul-terminated   */
{   zword_t *p = a;
    while (n-- > 0)
        if ((*p++ = *b++) == 0)
        {   zword_t c = 0;
            while (n-- > 0) *p++ = c;   /* ANSI says pad out with nul's */
            return a;
        }
    return a;
}

int strcmp_u(const zword_t *a, const zword_t *b)
{
    for (;;)
    {
        zword_t c1 = *a++, c2 = *b++;
        int d = c1 - c2;
        if (d != 0) return d;
        if (c1 == 0) return d;
    }
}

void font_paint_u(unsigned char font, const zword_t *string, unsigned int flags,
                  int xpos, int ypos, struct font_paint_block *block,
                  struct os_trfm *trfm,
                  int length)
{
    char buffer[512], *p = buffer;

    while ((*p++ = unicode_to_native(*string++, '?')) != 0);

    font_paint(font, buffer, flags, xpos, ypos, block, trfm, length);
}

unsigned tolower_u(unsigned u)
{
    static const unsigned char tolower_basic_latin[0x100] = {
        0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
        0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
        0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
        0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
        0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
        0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
        0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
        0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
        0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
        0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
        0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
        0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
        0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
        0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xD7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xDF,
        0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
        0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
    };
    static const unsigned char tolower_latin_extended_a[0x80] = {
        0x01,0x01,0x03,0x03,0x05,0x05,0x07,0x07,0x09,0x09,0x0B,0x0B,0x0D,0x0D,0x0F,0x0F,
        0x11,0x11,0x13,0x13,0x15,0x15,0x17,0x17,0x19,0x19,0x1B,0x1B,0x1D,0x1D,0x1F,0x1F,
        0x21,0x21,0x23,0x23,0x25,0x25,0x27,0x27,0x29,0x29,0x2B,0x2B,0x2D,0x2D,0x2F,0x2F,
        0x00,0x31,0x33,0x33,0x35,0x35,0x37,0x37,0x38,0x3A,0x3A,0x3C,0x3C,0x3E,0x3E,0x40,
        0x40,0x42,0x42,0x44,0x44,0x46,0x46,0x48,0x48,0x49,0x4B,0x4B,0x4D,0x4D,0x4F,0x4F,
        0x51,0x51,0x53,0x53,0x55,0x55,0x57,0x57,0x59,0x59,0x5B,0x5B,0x5D,0x5D,0x5F,0x5F,
        0x61,0x61,0x63,0x63,0x65,0x65,0x67,0x67,0x69,0x69,0x6B,0x6B,0x6D,0x6D,0x6F,0x6F,
        0x71,0x71,0x73,0x73,0x75,0x75,0x77,0x77,0x00,0x7A,0x7A,0x7C,0x7C,0x7E,0x7E,0x7F
    };
    static const unsigned char tolower_greek[0x50] = {
        0x80,0x81,0x82,0x83,0x84,0x85,0xAC,0x87,0xAD,0xAE,0xAF,0x8B,0xCC,0x8D,0xCD,0xCE,
        0x90,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
        0xC0,0xC1,0xA2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xAC,0xAD,0xAE,0xAF,
        0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
        0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF
    };
    static const unsigned char tolower_cyrillic[0x60] = {
        0x00,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
        0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
        0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
        0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
        0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
        0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F
    };

    if (u < 0x0100)
        u = tolower_basic_latin[u];
    else if (u == 0x0130)
        u = 0x0069;         /* Capital I with dot -> lower case i */
    else if (u == 0x0178)
        u = 0x00FF;         /* Capital Y diaeresis -> lower case y diaeresis */
    else if (u < 0x0180)
        u = tolower_latin_extended_a[u-0x100] + 0x100;
    else if (u >= 0x380 && u < 0x3D0)
        u = tolower_greek[u-0x380] + 0x300;
    else if (u >= 0x400 && u < 0x460)
        u = tolower_cyrillic[u-0x400] + 0x400;

    return u;
}

/*
 * UCS_to_UTF8 converts a UCS-4 code to UTF-8, storing the result
 * in the array pointed to by out. This array must be large enough
 * to store the resulting UTF-8 element (6 bytes will always be
 * sufficient). The return value is a pointer to the byte after
 * the last one written.
 */
char *UCS_to_UTF8(char *out, zword_t code)
{
    if (code < 0x80u)
    {
        *out++ = code;
        return out;
    }

    if (code < 0x800u)
        *out++ = 0xC0u | (code >> 6);
    else
    {
        if (code < 0x10000u)
            *out++ = 0xE0u | (code >> 12);
        *out++ = 0x80u | ((code >> 6) & 0x3Fu);
    }
    *out++ = 0x80u | (code & 0x3Fu);

    return out;
}

/*
 * UTF8_to_UCS takes a pointer to a UTF-8 sequence and outputs
 * the corresponding UCS4 code, returning the number of bytes consumed.
 */
int UTF8_to_UCS(const char *c, zword_t *r)
{
    unsigned c0 = c[0];
    unsigned ucs, cn;

    /* Easy case - is code 0xxxxxxx? If so, just return it. */
    if (c0 < 0x80u)
    {
        *r = c0;
        return 1;
    }

    /* If we have 10xxxxxx, we're broken */
    if (c0 < 0xC0u)
        goto badutf;

    /* Check second byte is 10xxxxxx */
    cn = c[1] ^ 0x80u;

    if (cn & 0xC0u)
        goto badutf;

    /* Add it in */
    ucs = (c0 << 6) | cn;

    /* Check for two byte codes - 110xxxxx 10xxxxxx */
    if (c0 < 0xE0u)
    {
        /* ucs holds 00110xxx xxxxxxxx */
        ucs &=~ 0x3000u;

        /* Check it's not a small value that shouldn't be encoded this long */
        if (ucs < 0x80u)
            goto badutf;

        *r = ucs;
        return 2;
    }

    /* Check third byte is 10xxxxxx */
    cn = c[2] ^ 0x80u;

    if (cn & 0xC0u)
        goto badutf;

    /* Add it in */
    ucs = (ucs << 6) | cn;

    /* Check for 3 byte codes - 1110xxxx 10xxxxxx 10xxxxxx */
    if (c0 < 0xF0u)
    {
        /* ucs holds 1110 xxxxxxxx xxxxxxxx */
        ucs &=~ 0xE0000u;

        /* Check it's not a small value that shouldn't be encoded this long */
        if (ucs < 0x800u)
            goto badutf;

        *r = ucs;
        return 3;
    }

    /* If we get this far, c0 = 1111xxxx which is bogus, so fall through to... */

  badutf:
    *r = 0xFFFDu;
    return 1;
}

