include "token-table.h";
constant TOKEN__EOL 0;
constant TOKEN__NUMBER 1;
constant TOKEN__STRING 2;
constant TOKEN__VAR 3;
constant TOKEN__SPACE 4;
constant TOKEN__PLUS 5;
constant TOKEN__MINUS 6;
constant TOKEN__STAR 7;
constant TOKEN__SLASH 8;
constant TOKEN__COLON 9;
constant TOKEN__EQUALS 10;
constant TOKEN__COMMA 11;
constant TOKEN__LPAREN 12;
constant TOKEN__RPAREN 13;
constant TOKEN__LARROW 14;
constant TOKEN__RARROW 15;
constant TOKEN__SEMICOLON 16;
constant TOKEN__GEQUAL 17;
constant TOKEN__LEQUAL 18;
constant TOKEN__NEQUAL 19;

! Is this an invalid alphabetical token character?

[ token_invalidchar c;
	return ((c < 'a') || (c > 'z'));
];

! Is this a valid variable name character?

[ token_validvarnamechar c;
	return (((c >= 'a') && (c <= 'z')) ||
		((c >= 'A') && (c <= 'Z')) ||
		((c >= '0') && (c <= '9')) ||
		(c == '_') || (c == '%') || (c == '$'));
];

! Is this a number?

[ token_isnumber c;
	return ((c >= '0') && (c <= '9'));
];

! Is this whitespace?

[ token_isspace c;
	return ((c == 32) || (c == 9));
];

! Tokenise an input stream.
!
! The input and output pointers must point to different regions of memory.

[ tokenise_stream in out  incount outcount i j k;
	out->0 = 0;
	outcount = 1;
	incount = 0;

	while (in->incount)
	{
		i = token_encode(in+incount, out+outcount);
		if (i ~= 0)
		{
			incount = incount + i;
			outcount = outcount + 1;
		}
		else
		{
			! Not a recognised token. We test against all the
			! other things we recognise. Note the order! This
			! is important.
			
			i = in->incount;

			! Is it white space?
				
			if (token_isspace(i))
			{
				while (token_isspace(in->(incount)))
					incount++;
				out->(outcount++) = TOKEN__SPACE;
				continue;
			}

			! Is it a number?

			if (token_isnumber(i))
			{
				out->(outcount++) = TOKEN__NUMBER;
				i = 0;
				do {
					i = i*10 + (in->incount - '0');
					incount++;
				} until (token_isnumber(in->incount) == 0);
				(out+outcount)-->0 = i;
				outcount = outcount + 2;
				continue;
			}

			! Is it a string?

			if (i == '"')
			{
				! Work out the size of the string.

				incount++;
				i = incount;
				do {
					k = in->(incount++);
				} until ((k == '"') || (k == 0));
				j = incount-i-1;

				! Emit the opcode.

				out->(outcount++) = TOKEN__STRING;
				out->(outcount++) = j;

				! And now emit the string itself.

				memcpy(out+outcount, in+i, j);
				outcount = outcount + j;

				! Remember to skip over the close quote
				! before exiting.

				!incount++;
				continue;
			}
			
			! Is it an operator?

			switch (i)
			{
				'+':	out->(outcount++) = TOKEN__PLUS;
					incount++;
					continue;

				'-':	out->(outcount++) = TOKEN__MINUS;
					incount++;
					continue;

				'*':	out->(outcount++) = TOKEN__STAR;
					incount++;
					continue;

				'/':	out->(outcount++) = TOKEN__SLASH;
					incount++;
					continue;

				':':	out->(outcount++) = TOKEN__COLON;
					incount++;
					continue;

				'=':	out->(outcount++) = TOKEN__EQUALS;
					incount++;
					continue;

				',':	out->(outcount++) = TOKEN__COMMA;
					incount++;
					continue;

				'(':	out->(outcount++) = TOKEN__LPAREN;
					incount++;
					continue;

				')':	out->(outcount++) = TOKEN__RPAREN;
					incount++;
					continue;
					
				'<':	switch (in->(++incount))
					{
						'>':	out->(outcount++) = TOKEN__NEQUAL;
							incount++;
							break;

						'=':	out->(outcount++) = TOKEN__LEQUAL;
							incount++;
							break;

						default: out->(outcount++) = TOKEN__LARROW;
							 break;
					}
					continue;

				'>':	switch (in->(++incount))
					{
						'=':	out->(outcount++) = TOKEN__GEQUAL;
							incount++;
							break;

						default: out->(outcount++) = TOKEN__RARROW;
							break;
					}
					continue;
					
				';':	out->(outcount++) = TOKEN__SEMICOLON;
					incount++;
					continue;
			}

			! Is it a variable name?

			if (token_validvarnamechar(i))
			{
				out->(outcount++) = TOKEN__VAR;
				do {
					out->(outcount++) = in->(incount++);
				} until (token_validvarnamechar(in->incount) == 0);
				out->(outcount++) = 0;
				continue;
			}

			return incount;
		}
	}

	! Patch up the line length.

	out->outcount = TOKEN__EOL;
	out->0 = outcount + 1;

	return -1;
];

! Detokenise a stream.

[ detokenise_stream in  i;
	while (1)
	{
		i = (in++)->0;
		switch(i)
		{
			TOKEN__EOL:
				print "^";
				return;

			TOKEN__VAR:
				while (i = (in++)->0)
					print (char) i;
				break;
				
			TOKEN__NUMBER:
				print in-->0;
				in = in + 2;
				break;

			TOKEN__SPACE:
				print " ";
				break;

			TOKEN__STRING:
				i = (in++)->0;
				print "~";
				while (i--)
					print (char) (in++)->0;
				print "~";
				break;

			TOKEN__PLUS:
				print "+";
				break;

			TOKEN__MINUS:
				print "-";
				break;

			TOKEN__STAR:
				print "*";
				break;

			TOKEN__SLASH:
				print "/";
				break;

			TOKEN__COLON:
				print ":";
				break;

			TOKEN__EQUALS:
				print "=";
				break;

			TOKEN__COMMA:
				print ",";
				break;

			TOKEN__LPAREN:
				print "(";
				break;

			TOKEN__RPAREN:
				print ")";
				break;

			TOKEN__LARROW:
				print "<";
				break;

			TOKEN__RARROW:
				print ">";
				break;

			TOKEN__SEMICOLON:
				print ";";
				break;

			TOKEN__GEQUAL:
				print ">=";
				break;

			TOKEN__LEQUAL:
				print "<=";
				break;

			TOKEN__NEQUAL:
				print "<>";
				break;
			default:
				print (string) token_decode(i);
		}
	}
];

