/******************************************************************************
 *                                                                            *
 * File:   utf     .c         Version  1.01            Date: 1994-05-05       *
 *                                                                            *
 * Copyright (C) 1993-1997 by kostis@acm.org (Kosta Kostis)                   *
 *                                                                            *
 ******************************************************************************
 *                                                                            *
 * History:                                                                   *
 *     1994-05-05: KK V1.01                                                   *
 *        - some compilers still don't understand "//" comments               *
 *     1993-07-09: KK V1.00                                                   *
 *        - initial coding                                                    *
 *                                                                            *
 *****************************************************************************/

#include "utf.h"

/******************************************************************************

	Function:
		return UTF representation of a Unicode character

	Parameters:
		ushort	ch		character to be represented

	Returns:
		uchar	*		pointer to UTF-string

 *****************************************************************************/

uchar	*Unicode2UTF
(
	ushort	ch
)
{
	static	uchar	utf	[MAX_UTF_LENGTH + 1] ;
	int	i ;

	/**********************************************************************

		initialize UTF-string (all empty)

	 *********************************************************************/

	for (i = 0 ; i <= MAX_UTF_LENGTH ; ++i)
		utf [i] = '\0' ;

	/**********************************************************************

		UTF is a one..three byte string

		0000 0000 : 0bbb bbbb -> 0bbb bbbb
		0000 0bbb : bbaa aaaa -> 110b bbbb : 10aa aaaa
		cccc bbbb : bbaa aaaa -> 1110 cccc : 10bb bbbb : 10aa aaaa

	 *********************************************************************/

				/*	case ch <= 0000 0000 : 0bbb bbbb     */
	if (ch < 0x0080)
	{
				/*	US ASCII compatibility               */
		utf [0] = (uchar) ch ;
	}
	else
	{
				/*	case ch <= 0000 0bbb : bbbb bbbb     */
		if (ch < 0x0800)
		{
			utf [0] = 0xC0 | ((ch & 0x07C0) >> 6) ;
			utf [1] = 0x80 |  (ch & 0x3F) ;
		}
				/*	case ch >  0000 0bbb : bbbb bbbb     */
		else
		{
			utf [0] = 0xE0 | ((ch & 0xF000) >> 12) ;
			utf [1] = 0x80 | ((ch & 0x0FC0) >> 6) ;
			utf [2] = 0x80 |  (ch & 0x3F) ;
		}
	}

	return (utf) ;
}

/******************************************************************************

	Function:
		convert UTF representation of a character to Unicode

	Parameters:
		uchar	*utf		UTF encoded character

	Returns:
		ISO10646_BAD		illegal value
		ushort			value to be converted to Unicode

 *****************************************************************************/

ushort	UTF2Unicode
(
	uchar	*utf
)
{
	ushort	value	= 0 ;

	/**********************************************************************

		first check values not greater than 0000 0000 : 0bbb bbbb

	 *********************************************************************/

	if (utf [0] < 0x80)
		return ((ushort) *utf) ;

	/**********************************************************************

		now check values not greater than   0000 0bbb : bbaa aaaa

	 *********************************************************************/

	if (utf [0] < 0xC0)
		return (ISO10646_BAD) ;

	if ((utf [1] < 0x80) || (utf [1] >= 0xC0))
		return (ISO10646_BAD) ;

	if (utf [0] < 0xE0)
	{
		if ((utf [1] < 0x80) || (utf [1] >= 0xC0))
			return (ISO10646_BAD) ;

		value  = (ushort) ((utf [0] & 0x1F) << 5) ;
		value += (ushort) (utf [1] & 0x3F) ;

		return (value) ;
	}

	/**********************************************************************

		now there should be a value greater than 0000 0bbb : bbaa aaaa

	 *********************************************************************/

	if ((utf [2] < 0x80) || (utf [2] >= 0xC0))
		return (ISO10646_BAD) ;

	value  = (ushort) ((utf [0] & 0x0F) << 12) ;
	value += (ushort) ((utf [1] & 0x3F) << 6) ;
	value += (ushort) (utf [2] & 0x3F) ;

	return (value) ;
}
