/*    picu_utf8.h
 *
 *    $Id: picu_utf8.h,v 1.2 2002/01/20 21:43:02 bstell Exp $
 *
 *    Copyright (c) 2000 Brian Stell
 *
 *    This package is free software and is provided ``as is'' without
 *    express or implied warranty. It may be used, redistributed and/or
 *    modified under the terms of the Perl Artistic License
 *    (see http://www.perl.com/perl/misc/Artistic.html)
 *
 */

extern "C" {
#include "EXTERN.h"
#include "perl.h"
#ifdef FORMAT
  /* perly.h defined FORMAT to 267 which breaks unicode.h */
# undef FORMAT
#endif
}

#include <stdio.h>
#include "unicode/unicode.h"
#include "unicode/unistr.h"

U8 *UnicodeStringToU8String(UnicodeString &, uint32_t *);
char *utf8_append_uchar(char *, UChar);

// for debug
void dump_CharString(const char *);
void dump_CharStringAsHex(const char *);
void dump_U8String(const U8 *);
void dump_U8StringAsHex(const U8 *);
void dump_UCharString(const UChar *, int32_t);

/*
 * Handle points up to 0x10FFFF
 * 0x000000 - 0x00007F => 1 byte  (7 = 7 bits)
 * 0x000080 - 0x0007FF => 2 bytes (5 + 6 = 11 bits)
 * 0x000800 - 0x00FFFF => 3 bytes (4 + 6 + 6 = 16 bits)
 * 0x010000 - 0x10FFFF => 4 bytes (3 + 6 + 6 + 6 = 21 bits)
 * > 0x10FFFF => error point 0xFFFF => 3 bytes
 */
#define UTF8_CHAR_LEN(c) \
            ((uint32_t)(c) <= 0x7F ? 1 : \
                ((uint32_t)(c) <= 0x7FF ? 2 : \
                    ((uint32_t)((c)-0x10000) >= 0xFFFFF ? 3 : 4) \
                ) \
            )

/*
 * This macro is very similar to the UTF8_APPEND_CHAR_UNSAFE
 * in utf8.h but for faster speed directly manipulates the pointer
 * instead of using pointer and index addressing
 */
#if 1
#   define UTF8_APPEND_UCHAR(s, c) { \
            if (((uint32_t)(c)) <= 0x7F) { \
                (*(s)++) = ((uint8_t)(c)); \
            } \
            else { \
                (s) = utf8_append_uchar((s), (c)); \
            } \
    }
#else
#   define UTF8_APPEND_UCHAR(s, c) { \
        if (((uint32_t)(c)) <= 0x7F) { \
            (*(s)++) = ((uint8_t)(c)); \
        } \
        else if (((uint32_t)(c)) <= 0x7FF) { \
            (*(s)++) = (0xC0 | (uint8_t)(((uint32_t)(c))>>6)); \
            (*(s)++) = (0x80 | (uint8_t)(((uint32_t)(c))&0x3F)); \
        } \
        else if (((uint32_t)(c)) <= 0xFFFF) { \
            (*(s)++) = (0xE0 | (uint8_t)(((uint32_t)(c))>>12)); \
            (*(s)++) = (0x80 | (uint8_t)((((uint32_t)(c))>>6)&0x3F)); \
            (*(s)++) = (0x80 | (uint8_t)(((uint32_t)(c))&0x3F)); \
        } \
        else if (((uint32_t)(c)) <= 0x10FFFF) { \
            (*(s)++) = (0xF0 | (uint8_t)(((uint32_t)(c))>>18)); \
            (*(s)++) = (0x80 | (uint8_t)((((uint32_t)(c))>>12)&0x3F)); \
            (*(s)++) = (0x80 | (uint8_t)((((uint32_t)(c))>>6)&0x3F)); \
            (*(s)++) = (0x80 | (uint8_t)(((uint32_t)(c))&0x3F)); \
        } \
        else { \
            (*(s)++) = 0xEF; \
            (*(s)++) = 0xBF; \
            (*(s)++) = 0xBF; \
        } \
    }
#endif

/*
 * This conversion could be done with the ICU converter but
 * since it will be done frequently this UCS2 -> UTF8 converter
 * is directly implememted so we can avoid malloc'ing a converter
 */
const UChar *
UCharStringToUTF8String(const UChar*, int32_t, char*, int32_t);

int32_t
UCharStringToUTF8strlen(const UChar*, int32_t);


