/*
 * string.c
 * Manage the various forms of strings, Utf8 and Unicode.
 *
 * Copyright (c) 1996 Cygnus Support
 * Copyright (c) 1996,97 T. J. Wilkinson & Associates, London, UK.
 *
 * See the file "license.terms" for information on usage and redistribution
 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 *
 * Written by Per Bothner <bothner@cygnus.com>
 * Written by Tim Wilkinson <tim@tjwassoc.co.uk>
 **/

/*** CHANGELOG ***
 *
 * 27.1.1998   Teemu Ikonen                 Some clean-up
 *
 */

#include <u.h>
#include <libc.h>
#include "plan9interface.h"
#include "config.h"
#include "config-std.h"
#include "config-mem.h"
#include "jtypes.h"
#include "constants.h"
#include "object.h"
#include "itypes.h"
#include "locks.h"
#include "classMethod.h"
#include "baseClasses.h"



/* To be compatible with Utf8Const 16-bit hash values. **/
#define HASH_CHARS(STR, LEN) (uint16) hashChars(STR, LEN)



static void finalizeString(void*);


static Hjava_lang_String** strhash = NULL;
static int strhash_count = 0;  /* Number of slots used in strhash. **/
static int strhash_size = 0;  /* Number of slots available in strhash.
			       * Assumed be power of 2! **/

static gcFuncs gcFinalizeString = { walkConservative, finalizeString };

#define DELETED_STRING ((Hjava_lang_String*)(~0))
#define SET_STRING_IS_INTERNED(STR) \
		gc_set_finalizer((STR), &gcFinalizeString)

int strLengthUtf8 (char* str, int len);
Hjava_lang_String** findInternSlot(jchar*, int, int);
void rehashStrings(void);


/* Count the number of Unicode chars encoded in a given Ut8 string. **/

int
strLengthUtf8(char* str, int len)
{
	register unsigned char* ptr;
	register unsigned char* limit;
	int str_length;

	ptr = (unsigned char*) str;
	limit = ptr + len;
	str_length = 0;
	for (; ptr < limit; str_length++) {
		if (UTF8_GET (ptr, limit) < 0) {
			return (-1);
		}
	}
	return (str_length);
}

/* Calculate a hash value for a string encoded in Utf8 format.
 * This returns the same hash value as specified or java.lang.String.hashCode.
 **/
int32
hashUtf8String (char* str, int len)
{
	int str_length;
	register unsigned char* ptr;
	register unsigned char* limit;
	int32 hash;
	int32 base;
	int j;
	int k;

	str_length = strLengthUtf8 (str, len);
	ptr = (unsigned char*) str;
	limit = ptr + len;
	hash = 0;
	j = 1;

	if (str_length <= 15) {
		base = 37;
		k = 1;
	}
	else {
		base = 39;
		k = str_length / 8;
	}
	for (; ptr < limit;) {
		int ch = UTF8_GET (ptr, limit);
		if (--j == 0) {
			hash = (base * hash) + ch;
			j = k;
		}
	}
	return (hash);
}



Utf8Const*
makeUtf8Const (char* s, int len)
{
	Utf8Const* m;

	if (len < 0) {
		len = strlen (s);
	}

	m = (Utf8Const*)gc_malloc(sizeof(Utf8Const) + len + 1, &gcUtf8Const);
	memcpy (m->data, s, len);
	m->data[len] = 0;
	m->length = len;

	m->hash = (uint16) hashUtf8String (s, len);

	return (m);
}

Hjava_lang_String*
makeReplaceJavaStringFromUtf8(unsigned char* ptr, int len, int from_ch, int to_ch)
{
	Hjava_lang_String* obj;
	register unsigned char* limit;
	jchar* chrs;
	int ch;

	limit = ptr + len;
	obj = (Hjava_lang_String*)newObject(&StringClass);
	obj->data[0].count = strLengthUtf8 ((char *)ptr, len);
	obj->data[0].value = (HArrayOfChar*)newPrimArray(TYPE_CLASS(TYPE_Char), len);
	obj->data[0].offset = 0;
	chrs = STRING_DATA(obj);

	for (; ptr < limit;) {
		ch = UTF8_GET (ptr, limit);
		if (ch == from_ch) {
			ch = to_ch;
		}
		*chrs++ = ch;
	}

	return (obj);
}

Hjava_lang_String*
Utf8Const2JavaString(Utf8Const* str)
{
	Hjava_lang_String* obj;
	jchar *chrs;
	jchar buffer[100];
	Hjava_lang_Object* array;
	int hash;
	register unsigned char* data;
	register unsigned char* limit;
	int length;
	Hjava_lang_String** ptr;

	data = (unsigned char *)str->data;
	limit = data + str->length;
	length = strLengthUtf8((char *)data, str->length);

	if (length <= (sizeof(buffer) / sizeof(jchar))) {
		chrs = buffer;
		array = NULL;
	}
	else {
		array = newPrimArray(&charClass, length);
		chrs = (jchar*)ARRAY_DATA(array);
	}

	while (data < limit) {
		*chrs++ = UTF8_GET(data, limit);
        }
	chrs -= length;

	if (4 * strhash_count >= 3 * strhash_size) {
		rehashStrings();
	}

	hash = str->hash;

	ptr = findInternSlot (chrs, length, hash);
	if (*ptr != NULL && *ptr != DELETED_STRING) {
		return *ptr;
	}
	strhash_count++;
	if (array == NULL) {
		array = newPrimArray(&charClass, length);
		chrs = (jchar*) ARRAY_DATA(array);
		memcpy (chrs, buffer, sizeof(jchar)*length);
	}
	obj = (Hjava_lang_String*)newObject(&StringClass);
	obj->data[0].count = length;
	obj->data[0].value = (HArrayOfChar*)array;
	obj->data[0].offset = 0;
	*ptr = obj;
	SET_STRING_IS_INTERNED(obj);
	return (obj);
}

/* Return true iff a Utf8Const string is equal to a Java String. **/

int
equalUtf8JavaStrings(Utf8Const* a, Hjava_lang_String* b)
{
	jchar* data = STRING_DATA(b);
	register unsigned char* ptr = (unsigned char *)a->data;
        register unsigned char* limit = ptr + a->length;
	int len = STRING_SIZE(b);
	if (len != a->length)
		return 0;
	while (--len >= 0) {
		if (*data++ != UTF8_GET(ptr, limit))
			return 0;
	}
	return 1;
}

/* Find a slot where the string with elements DATA, length LEN,
   and hash HASH should go in the strhash table of interned strings. **/
Hjava_lang_String**
findInternSlot (jchar* data, int len, int hash)
{
	int start_index = hash & (strhash_size - 1);
	int deleted_index = -1;
	
	register int index = start_index;
	/* step must be non-zero, and relatively prime with strhash_size. **/
	int step = 8 * hash + 7;
	for (;;) {
		register Hjava_lang_String** ptr = &strhash[index];
		if (*ptr == NULL) {
			if (deleted_index >= 0) {
				return (&strhash[deleted_index]);
			}
			else {
				return (ptr);
			}
		}
		else if (*ptr == DELETED_STRING) {
			deleted_index = index;
		}
		else if (STRING_SIZE(*ptr) == len
			 && memcmp(STRING_DATA(*ptr), data, 2*len) == 0) {
			return (ptr);
		}
		index = (index + step) & (strhash_size - 1);
		if (index == start_index) {
			ABORT();
		}
	}
	return 0;
}

/* Calculate a hash code for the string starting at PTR at given LENGTH.
   This uses the same formula as specified for java.lang.String.hash. **/

int32
hashChars (jchar* ptr, int length)
{
	register jchar* limit = ptr + length;
       int32 hash = *ptr;
	if (length <= 15) {
               while (++ptr < limit) {
                       hash = (37 * hash) + *ptr;
		}
	}
	else {
               int skip = length / 8;
               while ((ptr += skip) < limit) {
                       hash = (39 * hash) + *ptr;
		}
	}
	return (hash);
}

Hjava_lang_String**
findInternSlotFromString (Hjava_lang_String* str)
{
	jchar* data = STRING_DATA(str);
	int length = STRING_SIZE(str);
	return (findInternSlot(data, length, HASH_CHARS (data, length)));
}

void
rehashStrings(void)
{
	if (strhash == NULL) {
		strhash_size = 1024;
		/* Note we do *not* want the GC scanning strhash,
		   so we use gc_calloc_fixed instead of gc_malloc. **/
		strhash = gc_calloc_fixed(strhash_size, sizeof(Hjava_lang_String*));
	}
	else {
		register int i;
		register Hjava_lang_String** ptr;

		i = strhash_size;
		ptr = strhash + i;
		strhash_size *= 2;
		strhash = gc_calloc_fixed(strhash_size, sizeof (Hjava_lang_String*));

		while (--i >= 0) {
			int hash;
			int index;
			int step;

			--ptr;
			if (*ptr == NULL || *ptr == DELETED_STRING) {
				continue;
			}
			/* This is faster equivalent of
			 * *findInternSlotFromString (*ptr) = *ptr; **/
			hash = HASH_CHARS(STRING_DATA(*ptr),STRING_SIZE(*ptr));
			index = hash & (strhash_size - 1);
			step = 8 * hash + 7;
			for (;;) {
				if (strhash[index] == NULL) {
					strhash[index] = *ptr;
					break;
				}
				index = (index + step) & (strhash_size - 1);
			}
		}
		gc_free_fixed(ptr); /* Old value of strhash. **/
	}
}

Hjava_lang_String*
internJavaString(Hjava_lang_String* str)
{
	Hjava_lang_String** ptr;

	if (4 * strhash_count >= 3 * strhash_size) {
		rehashStrings();
	}
	ptr = findInternSlotFromString (str);
	if (*ptr != NULL && *ptr != DELETED_STRING) {
		return *ptr;
	}
	SET_STRING_IS_INTERNED(str);
	strhash_count++;
	*ptr = str;
	return (str);
}

/* Called by String fake finalizer. **/
void
uninternJavaString(Hjava_lang_String* str)
{
	Hjava_lang_String** ptr = findInternSlotFromString(str);
	if (*ptr == NULL || *ptr == DELETED_STRING) {
		return;
	}
	*ptr = DELETED_STRING;
	strhash_count--;
}

/*
 * Convert an Java string to a C string.
 **/
char*
javaString2CString(struct Hjava_lang_String* js, char* cs, int len)
{
	jchar* chrs;

	if (len <= 0) {
		cs = NULL;
	}
	else if (js == NULL) {
		cs[0] = 0;
	}
	else {
		chrs = STRING_DATA(js);
		len--;
		if (len > STRING_SIZE(js)) {
			len = STRING_SIZE(js);
		}
		cs[len] = 0;
		while (--len >= 0) {
			*cs++ = (char)*chrs++;
		}
	}
	return (cs);
}

/*
 * Convert a Java string into a malloced C string buffer.
 **/
char*
makeCString(Hjava_lang_String* js)
{
	char* str;

	str = gc_malloc_fixed(STRING_SIZE(js) + 1);
	if (str != 0) {
		javaString2CString(js, str, STRING_SIZE(js) + 1);
	}
	return (str);
}

/*
 * Convert a C string into a Java String.
 **/
Hjava_lang_String*
makeJavaString(char* cs, int len)
{
	Hjava_lang_String* obj;
	jchar* chrs;

	obj = (Hjava_lang_String*)newObject(&StringClass);
	/* FIXME - should intern string literals **/
	obj->data[0].count = len;
	obj->data[0].value = (HArrayOfChar*)newPrimArray(TYPE_CLASS(TYPE_Char), len);
	obj->data[0].offset = 0;
	chrs = STRING_DATA(obj);

	while (--len >= 0) {
		*chrs++ = *(unsigned char*)cs++;
	}
	return (obj);
}


/*
 * Convert a C string into a Java char array.
 **/
Hjava_lang_Object*
makeJavaCharArray(char* cs, int len)
{
	Hjava_lang_Object* obj;
	int i;

	obj = newPrimArray(TYPE_CLASS(TYPE_Char), len);
	if (cs != NULL) {
		jchar *ptr = (jchar*) ARRAY_DATA(obj);
		for (i = 0;  i < len;  i++) {
			*ptr++ = *(unsigned char*)cs++;
		}
	}
	return (obj);
}

/*
 * Finalize an interned string.
 **/
static
void
finalizeString(void* mem)
{
	Hjava_lang_String* str;

	str = (Hjava_lang_String*)mem;

        assert(OBJECT_CLASS(&str->base) == &StringClass);
	uninternJavaString(str);
}


