//
//  XTOutputTextPaeser.m
//  TadsTerp
//
//  Created by Rune Berg on 08/04/14.
//  Copyright (c) 2014 Rune Berg. All rights reserved.
//

#import "XTOutputTextParserBase_private.h"
#import "XTOutputTextParserHtml.h"
#import "XTPrefs.h"
#import "XTLogger.h"
#import "XTHtmlCharEntityParser.h"
#import "XTHtmlTag.h"
#import "XTHtmlTagBr.h"
#import "XTHtmlTagP.h"
#import "XTHtmlTagTab.h"
#import "XTHtmlTagTitle.h"
#import "XTHtmlTagEm.h"
#import "XTHtmlTagQ.h"
#import "XTHtmlTagH1.h"
#import "XTHtmlTagH2.h"
#import "XTHtmlTagH3.h"
#import "XTHtmlTagH4.h"
#import "XTHtmlTagH5.h"
#import "XTHtmlTagH6.h"
#import "XTHtmlTagI.h"
#import "XTHtmlTagA.h"
#import "XTHtmlTagB.h"
#import "XTHtmlTagU.h"
#import "XTHtmlTagQuestionMarkT2.h"
#import "XTHtmlTagQuestionMarkT3.h"
#import "XTHtmlTagStrong.h"
#import "XTHtmlTagCenter.h"
#import "XTHtmlTagOl.h"
#import "XTHtmlTagLi.h"
#import "XTHtmlTagHr.h"
#import "XTHtmlTagNoop.h"
#import "XTHtmlTagAboutBox.h"
#import "XTHtmlTagBanner.h"
#import "XTHtmlTagTt.h"
#import "XTHtmlTagDiv.h"
#import "XTHtmlTagUl.h"
#import "XTHtmlTagTable.h"
#import "XTHtmlTagTr.h"
#import "XTHtmlTagTh.h"
#import "XTHtmlTagTd.h"
#import "XTHtmlTagBlockQuote.h"
#import "XTHtmlTagCite.h"
#import "XTHtmlTagFont.h"
#import "XTHtmlTagCode.h"
#import "XTHtmlTagPre.h"
#import "XTHtmlTagImg.h"
#import "XTHtmlTagBody.h"
#import "XTHtmlTagSmall.h"
#import "XTHtmlTagBig.h"
#import "XTHtmlTagStrike.h"
#import "XTHtmlTagS.h"
#import "XTHtmlTagSup.h"
#import "XTHtmlTagSub.h"
#import "XTHtmlTagContainer.h"
#import "XTHtmlTagOutermost.h"
#import "XTHtmlTagText.h"
#import "XTHtmlTagWhitespace.h"
#import "XTHtmlTagQuotedSpace.h"
#import "XTHtmlTagNonbreakingSpace.h"
#import "XTHtmlTagSpecialSpace.h"
#import "XTHtmlTagSecondOutermost.h"
#import "XTHtmlTagKbd.h"
#import "XTHtmlTagSamp.h"
#import "XTHtmlTagCredit.h"
#import "XTHtmlTagListing.h"
#import "XTHtmlTagAddress.h"
#import "XTHtmlTagDl.h"
#import "XTHtmlTagDt.h"
#import "XTHtmlTagDd.h"
#import "XTHtmlTagDfn.h"
#import "XTHtmlTagVar.h"
#import "XTHtmlTagXmp.h"
#import "XTHtmlTagLh.h"
#import "XTHtmlTagBasefont.h"
#import "XTHtmlTagCaption.h"
#import "XTAllocDeallocCounter.h"
#import "XTObjectUtils.h"
#import "XTPair.h"
#import "XTStringUtils.h"


@interface XTOutputTextParserHtml ()

@property XTHtmlCharEntityParser *charEntityParser;

@property NSDictionary<NSString*,Class> *tagClassByName;
@property NSArray *ignoredTagNames;

@property NSCharacterSet *tagNameCharSet;
@property NSCharacterSet *attributeNameCharSet;
@property NSCharacterSet *whitespaceCharSet;
@property NSCharacterSet *editableWhitespaceCharSet; // ' ', \n \t
@property NSCharacterSet *specialSpaceCharSet; // typographical spaces: ensp, emsp, ...
@property NSCharacterSet *illegalInUnuotedAttributeValueCharSet;

@property NSMutableString *regularTextBuffer;
@property NSMutableString *whitespaceBuffer;
@property NSMutableString *tagTextBuffer; // entire tag text: brackets, name, attrs
@property NSMutableString *tagNameBuffer;
@property BOOL closing;
@property unichar attributeQuoteChar;
@property NSMutableString *attributeNameBuffer;
@property NSMutableString *attributeValueBuffer;
@property NSMutableArray *attributes;

@property XTHtmlTagVerbatim *verbatimModeTag;

@end


@implementation XTOutputTextParserHtml

static XTLogger* logger;

+ (void)initialize
{
	logger = [XTLogger loggerForClass:[XTOutputTextParserHtml class]];
}

OVERRIDE_ALLOC_FOR_COUNTER
OVERRIDE_DEALLOC_FOR_COUNTER

- (id)init
{
    self = [super init];
    if (self) {
		[self initTagDefs];

		NSMutableCharacterSet *tempTNameCharSet = [NSMutableCharacterSet alphanumericCharacterSet];
		[tempTNameCharSet formUnionWithCharacterSet:[NSCharacterSet decimalDigitCharacterSet]];
		[tempTNameCharSet addCharactersInString:@"_?"];
		_tagNameCharSet = tempTNameCharSet;
		
		_attributeNameCharSet = [self.tagNameCharSet mutableCopy];
		_whitespaceCharSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
		_charEntityParser = [XTHtmlCharEntityParser new];
		_editableWhitespaceCharSet = [NSCharacterSet characterSetWithCharactersInString:@" \t\n"];
		_specialSpaceCharSet = [NSCharacterSet characterSetWithCharactersInString:@"\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a"];
			//TODO unit test 2004...
		_illegalInUnuotedAttributeValueCharSet = [NSCharacterSet characterSetWithCharactersInString:@"\"'`=<>"];
		
		_state = XT_HTML_INITIAL;
		_regularTextBuffer = [NSMutableString stringWithCapacity:500];
		_whitespaceBuffer =[NSMutableString stringWithCapacity:500];
		_tagTextBuffer = [NSMutableString stringWithCapacity:100];
		_tagNameBuffer = [NSMutableString stringWithCapacity:100];
		_closing = NO;
		_attributeNameBuffer = [NSMutableString stringWithCapacity:100];
		_attributeValueBuffer = [NSMutableString stringWithCapacity:100];
		_attributes = [NSMutableArray array];
		
		_verbatimModeTag = nil;
    }
    return self;
}

- (void)teardown
{
}

- (void)bufferRegularText:(unichar)ch
{
	[self.regularTextBuffer appendString:[NSString stringWithCharacters:&ch length:1]];
}
	
- (void)clearRegularText
{
	[self.regularTextBuffer setString:@""];
}

- (void)bufferWhitespace:(unichar)ch
{
	[self.whitespaceBuffer appendString:[NSString stringWithCharacters:&ch length:1]];
}

- (void)clearWhitespace
{
	[self.whitespaceBuffer setString:@""];
}

- (void)clearTagText
{
	[self.tagTextBuffer setString:@""];
}

- (void)bufferTagText:(unichar)ch
{
	[self.tagTextBuffer appendString:[NSString stringWithCharacters:&ch length:1]];
}

- (void)clearTagName
{
	[self.tagNameBuffer setString:@""];
}

- (void)bufferTagName:(unichar)ch
{
	[self.tagNameBuffer appendString:[NSString stringWithCharacters:&ch length:1]];
}

- (void)clearAttributeName
{
	[self.attributeNameBuffer setString:@""];
}

- (void)bufferAttributeName:(unichar)ch
{
	[self.attributeNameBuffer appendString:[NSString stringWithCharacters:&ch length:1]];
}

- (void)clearAttributeValue
{
	[self.attributeValueBuffer setString:@""];
}

- (void)bufferAttributeValue:(unichar)ch
{
	[self.attributeValueBuffer appendString:[NSString stringWithCharacters:&ch length:1]];
}

- (void)clearAttributes
{
	[self.attributes removeAllObjects];
}

- (void)addTextElement
{
	[self addRegularTextElement:self.regularTextBuffer];
	[self clearRegularText];
}

- (BOOL)isHonoringVerbatimWhitespace
{
	BOOL res = (self.verbatimModeTag != nil) && [self.verbatimModeTag honorVerbatimWhitespace];
	return res;
}

- (BOOL)isHonoringVerbatimNewlines
{
	BOOL res = (self.verbatimModeTag != nil) && [self.verbatimModeTag honorVerbatimNewlines];
	return res;
}

- (BOOL)isHonoringTags
{
	BOOL res = (self.verbatimModeTag == nil) || [self.verbatimModeTag honorContainedTags];
	return res;
}

- (void)addWhiteSpaceElement
{
	NSString *wsText = [NSString stringWithString:self.whitespaceBuffer];
	if (! [self isHonoringVerbatimWhitespace]) {
		XTHtmlTagWhitespace *tag = [XTHtmlTagWhitespace tagWithText:wsText];
		[tag onParsing:self];
	} else {
		if (! [self isHonoringVerbatimNewlines]) {
			wsText = [XTStringUtils withoutNewlines:wsText];
		}
		[self addRegularTextElement:wsText];
	}
	[self clearWhitespace];
}

- (void)addTagElement
{
	XT_DEF_SELNAME;
	
	if ([self isHonoringTags]) {
		XTHtmlTag *tag = [self makeTag];
		if (tag != nil) {
			if (! tag.closing) {
				[tag onParsing:self];
			} else {
				[tag onEndTag:self];
			}
		} else {
			/* TODO consider a new type of formatting element for this case, for easier styling/formatting */
			if (self.tagTextBuffer.length >= 1) {
				XTPrefs *prefs = [XTPrefs prefs];
				if (prefs.printBrokenHtmlMarkup.value.boolValue) {
					[self addRegularTextElement:self.tagTextBuffer];
				}
				XT_WARN_1(@"bad tag text \"%@\"", self.tagTextBuffer);
			}
		}
	} else {
		XTHtmlTag *tag = [self makeTag];
		BOOL resumedHonoringHtmlTags = NO;
		if (tag != nil) {
			if (tag.closing) {
				if ([XTObjectUtils object:tag isOfSameClassAs:self.verbatimModeTag]) {
					[tag onEndTag:self];
					resumedHonoringHtmlTags = YES;
				}
			}
		}
		if (! resumedHonoringHtmlTags) {
			if (tag == nil) {
				XT_WARN_1(@"bad tag text \"%@\"", self.tagTextBuffer);
			}
			[self addRegularTextElement:self.tagTextBuffer];
		}
	}
	[self clearTagText];
	[self clearTagName];
	[self clearAttributeName];
	[self clearAttributeValue];
	[self clearAttributes];
	self.closing = NO;
}

#define HANDLE_START_OF_MISFORMED_TAG \
	XT_WARN_3(@"(state %d) unexpected character '%C' at index %lu - handling as misformed tag until next '>'", self.state, ch, index);

#define HANDLE_UNEXPECTED_CHAR_IN_TAG_NAME \
	XT_WARN_3(@"(state %d) unexpected character '%C' at index %lu - continuing tag...", self.state, ch, index);

#define HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR \
	XT_WARN_3(@"(state %d) unexpected character '%C' at index %lu - continuing tag...", self.state, ch, index);

- (void)handleEndOfMisformedTag
{
	XTPrefs *prefs = [XTPrefs prefs];
	if (prefs.printBrokenHtmlMarkup.value.boolValue || ! [self isHonoringTags]) {
		/* TODO consider a new type of formatting element for this case, for easier styling/formatting */
		[self addRegularTextElement:self.tagTextBuffer];
	}
	[self clearRegularText];
	[self clearWhitespace];
	[self clearTagText];
	[self clearTagName];
	[self clearAttributeName];
	[self clearAttributeValue];
	[self clearAttributes];
	self.state = XT_HTML_INITIAL;
}

- (void)resetForNextCommand
{
	self.state = XT_HTML_INITIAL;
	self.closing = NO;
	
	[self clearRegularText];
	[self clearWhitespace];
	[self clearTagText];
	[self clearTagName];
	[self clearAttributeName];
	[self clearAttributeValue];
	[self clearAttributes];

	[self.containerStack resetForNextCommand];
	
	[self.charEntityParser reset];
}

- (void)resetForGameHasEndedMsg
{
	[self resetForNextCommand];
	[self.containerStack reset];
	self.verbatimModeTag = nil;
}

- (XTHtmlTagContainer *)getOuterContainer
{
	return self.containerStack.bottom;
}

- (void)enterVerbatimMode:(XTHtmlTagVerbatim *)tag
{
	self.verbatimModeTag = tag;
}

- (void)exitVerbatimMode
{
	self.verbatimModeTag = nil;
}

- (void)parse:(NSString *)string
{
	XT_DEF_SELNAME;
	XT_TRACE_1(@"\"%@\"", string);
	
	for (NSUInteger index = 0; index < string.length; index++) {
		const unichar ch = [string characterAtIndex:index];
		switch (self.state) {
			case XT_HTML_INITIAL:
				if ([self.editableWhitespaceCharSet characterIsMember:ch]) {
					[self bufferWhitespace:ch];
					self.state = XT_HTML_IN_WHITESPACE;
				} else if (ch == '<') {
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					[self bufferRegularText:ch];
					self.state = XT_HTML_IN_TEXT;
				}
				break;
			case XT_HTML_IN_WHITESPACE:
				if ([self.editableWhitespaceCharSet characterIsMember:ch]) {
					[self bufferWhitespace:ch];
					// self.state = XT_HTML_IN_WHITESPACE;
				} else if (ch == '<') {
					[self addWhiteSpaceElement];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					[self addWhiteSpaceElement];
					[self bufferRegularText:ch];
					self.state = XT_HTML_IN_TEXT;
				}
				break;
			case XT_HTML_IN_TEXT:
				if ([self.editableWhitespaceCharSet characterIsMember:ch]) {
					[self addTextElement];
					[self bufferWhitespace:ch];
					self.state = XT_HTML_IN_WHITESPACE;
				} else if (ch == '<') {
					[self addTextElement];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					[self bufferRegularText:ch];
					//self.state = XT_HTML_IN_TEXT;
				}
				break;
			case XT_HTML_AFTER_TAG_START:
				[self bufferTagText:ch];
				if ([self.tagNameCharSet characterIsMember:ch]) {
					[self bufferTagName:ch];
					self.state = XT_HTML_IN_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					self.state = XT_HTML_IN_TAG_NAME;
					self.closing = YES;
				} else if (ch == '.') {
					[self bufferTagName:ch]; // yup, we consider '.' part of the tag name
					self.state = XT_HTML_AFTER_PSEUDOTAG_START;
				} else if (ch == '!') {
					// discard ch
					self.state = XT_HTML_IN_COMMENT_OPENING_AFTER_EXCL_MARK;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_AFTER_PSEUDOTAG_START:
				if ([self.tagNameCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self bufferTagName:ch];
					self.state = XT_HTML_IN_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_TAG_NAME;
					self.closing = YES;
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_IN_TAG_NAME:
				if ([self.tagNameCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self bufferTagName:ch];
					// state = IN_TAG_NAME;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if (ch == '>') {
					[self bufferTagText:ch];
					[self addTagElement];
					self.state = XT_HTML_INITIAL;
				} else if (ch == '/' || ch == '\\') {
					[self bufferTagText:ch];
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '=') {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_NAME; // but don't buffer it
					[self bufferTagText:ch];
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_AFTER_TAG_NAME:
				if ([self.whitespaceCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					// state == AFTER_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					[self bufferTagText:ch];
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '>') {
					[self bufferTagText:ch];
					[self addTagElement];
					self.state = XT_HTML_INITIAL;
				} else if ([self.attributeNameCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self clearAttributeName];
					[self bufferAttributeName:ch];
					self.state = XT_HTML_IN_ATTRIBUTE_NAME;
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					[self bufferTagText:ch];
					[self clearAttributeName];
					[self bufferAttributeName:ch];
					self.state = XT_HTML_IN_ATTRIBUTE_NAME;
				}
				break;
			case XT_HTML_IN_ATTRIBUTE_NAME:
				if ([self.attributeNameCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self bufferAttributeName:ch];
					// state = XT_HTML_IN_ATTRIBUTE_NAME;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '>') {
					[self bufferTagText:ch];
					[self addAttribute];
					[self addTagElement];
					self.state = XT_HTML_INITIAL;
				} else if (ch == '=') {
					[self bufferTagText:ch];
					[self clearAttributeValue];
					self.state = XT_HTML_STARTING_ATTRIBUTE_VALUE;
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					[self bufferTagText:ch];
					[self bufferAttributeName:ch];
					self.state = XT_HTML_IN_ATTRIBUTE_NAME;
				}
				break;
			case XT_HTML_STARTING_ATTRIBUTE_VALUE:
				if (ch == '>') {
					[self bufferTagText:ch];
					[self addAttribute];
					[self addTagElement];
					self.state = XT_HTML_INITIAL;
				} else if (ch == '/' || ch == '\\') {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '"') {
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_QUOTED_ATTRIBUTE_VALUE;
					self.attributeQuoteChar = ch;
				} else if (ch == '\'') {
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_QUOTED_ATTRIBUTE_VALUE;
					self.attributeQuoteChar = ch;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if ([self isLegalUnquotedAttributeValueChar:ch]) {
					[self bufferTagText:ch];
					[self bufferAttributeValue:ch];
					self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					[self bufferTagText:ch];
					[self bufferAttributeValue:ch];
					self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				}
				break;
			case XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE:
				if (ch == '>') {
					[self bufferTagText:ch];
					[self addAttribute];
					[self addTagElement];
					self.state = XT_HTML_INITIAL;
				//} else if (ch == '/' || ch == '\\') {
				//	ADD_ATTRIBUTE;
				//	self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if ([self isLegalUnquotedAttributeValueChar:ch]) {
					[self bufferTagText:ch];
					[self bufferAttributeValue:ch];
					//self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					[self bufferTagText:ch];
					[self bufferAttributeValue:ch];
					//self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				}
				break;
			case XT_HTML_IN_QUOTED_ATTRIBUTE_VALUE:
				if (ch == '>') {
					[self bufferTagText:ch];
					if (self.closing) {
						[self addAttribute];
						[self addTagElement];
						self.state = XT_HTML_INITIAL;
					} else {
						//TODO exp: ADD_ATTRIBUTE;
						[self bufferTagText:ch];
						[self bufferAttributeValue:ch];
						//TODO exp rm'd: ADD_TAG_ELEMENT;
						//TODO exp: HANDLE_START_OF_MISFORMED_TAG;
						//TODO exp: self.state = XT_HTML_IN_MISFORMED_TAG;
					}
				} else if (ch == '"' && self.attributeQuoteChar == ch) {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if (ch == '\'' && self.attributeQuoteChar == ch) {
					[self bufferTagText:ch];
					[self addAttribute];
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if ([self isLegalQuotedAttributeValueChar:ch]) {
					[self bufferTagText:ch];
					[self bufferAttributeValue:ch];
					// state = IN_QUOTED_ATTRIBUTE_VALUE;
				} else if (ch == '<') {
					if (! [self isHonoringTags]) {
						[self handleEndOfMisformedTag];
						[self bufferTagText:ch];
						self.state = XT_HTML_AFTER_TAG_START;
					} else if ([self.verbatimModeTag isKindOfClass:[XTHtmlTagPre class]]) {
						[self bufferTagText:ch];
						[self bufferAttributeValue:ch];
					} else {
						[self bufferTagText:ch];
						[self bufferAttributeValue:ch];
					}
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_AT_CLOSING_SLASH:
				if (ch == '>') {
					[self bufferTagText:ch];
					[self addTagElement];
					self.state = XT_HTML_INITIAL;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					[self bufferTagText:ch];
					// state == XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					[self bufferTagText:ch];
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_IN_MISFORMED_TAG:
				if (ch == '<' && ! [self isHonoringTags]) {
					[self handleEndOfMisformedTag];
					[self bufferTagText:ch];
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					[self bufferTagText:ch];
					if (ch == '>') {
						[self handleEndOfMisformedTag];
						self.state = XT_HTML_INITIAL;
					}
				}
				break;
			case XT_HTML_IN_COMMENT_OPENING_AFTER_EXCL_MARK:
				if (ch == '-') {
					self.state = XT_HTML_IN_COMMENT_OPENING_AFTER_DASH_1;
				} else if (ch == '>') {
					self.state = XT_HTML_INITIAL;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_IN_COMMENT_OPENING_AFTER_DASH_1:
				if (ch == '-') {
					self.state = XT_HTML_IN_COMMENT_CONTENT;
				} else if (ch == '>') {
					self.state = XT_HTML_INITIAL;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_IN_COMMENT_CONTENT:
				if (ch == '-') {
					self.state = XT_HTML_IN_COMMENT_CLOSING_AFTER_DASH_1;
				} else if (ch == '>') {
					self.state = XT_HTML_INITIAL;
				}
				break;
			case XT_HTML_IN_COMMENT_CLOSING_AFTER_DASH_1:
				if (ch == '-') {
					self.state = XT_HTML_IN_COMMENT_CLOSING_AFTER_DASH_2;
				} else if (ch == '>') {
					self.state = XT_HTML_INITIAL;
				}
				break;
			case XT_HTML_IN_COMMENT_CLOSING_AFTER_DASH_2:
				if (ch == '>') {
					self.state = XT_HTML_INITIAL;
				} else {
					self.state = XT_HTML_IN_COMMENT_CONTENT;
				}
				break;
			default:
				// This shouldn't happen - reset everything
				XT_ERROR_1(@"*** BUG! Unexpected state %d - reset all parser states and buffers", self.state);
				[self clearRegularText];
				[self clearWhitespace];
				[self clearTagText];
				[self clearTagName];
				[self clearAttributeName];
				[self clearAttributeValue];
				[self clearAttributes];
				self.state = XT_HTML_INITIAL;
				[self.charEntityParser reset];
				break;
		}
	}
}

- (void)flush
{
	XT_DEF_SELNAME;
	
	switch (self.state) {
		case XT_HTML_INITIAL:
			// nothing
			break;
		case XT_HTML_IN_WHITESPACE:
			[self addWhiteSpaceElement];
			self.state = XT_HTML_INITIAL;
			break;
		case XT_HTML_IN_TEXT:
			[self addTextElement];
			// ... but DON'T add incomplete char entity
			break;
		case XT_HTML_IN_MISFORMED_TAG:
			XT_WARN_2(@"(state %d) flushing misformed HTML: \"%@\"", self.state, self.tagTextBuffer);
			/* EXP: do nothing, as per mjr's advice 2015-07-01
			if (self.printBrokenHtmlMarkup) {
				[self addRegularTextElement:self.tagTextBuffer toArray:res];
			}*/
			break;
		default:
			// In a legal but incomplete html tag state:
			/* EXP: do nothing, as per mjr's advice 2015-07-01
			if (self.tagTextBuffer.length >= 1) {
				XT_WARN_2(@"(state %d) flushing incomplete HTML tag: \"%@\"", self.state, self.tagTextBuffer);
				if (self.printBrokenHtmlMarkup) {
					[self addRegularTextElement:self.tagTextBuffer toArray:res];
				}
			}*/
			break;
	}
}

- (void)hardFlush
{
	[self flush];
	//TODO !!! rework:
	if (self.state == XT_HTML_IN_TEXT || self.state == XT_HTML_INITIAL) {
		NSString *entityText = [self.charEntityParser hardFlush];
		if (entityText.length >= 1) {
			[self addRegularTextElement:entityText];
		}
	}
}

- (BOOL)isLegalUnquotedAttributeValueChar:(unichar)ch
{
	BOOL res = ! [self.illegalInUnuotedAttributeValueCharSet characterIsMember:ch];
	return res;
}

- (BOOL)isLegalQuotedAttributeValueChar:(unichar)ch
{
	BOOL res = (ch != self.attributeQuoteChar) && (ch != '<');
	return res;
}

- (XTHtmlTag *)makeTag
{
	XT_DEF_SELNAME;
	
	XTHtmlTag *tag = nil;
	NSString *tagName = self.tagNameBuffer;
	Class tagClass = [self findTagClassForName:tagName];
	
	if (tagClass != nil) {
		tag = [[tagClass alloc] init];
		tag.closing = self.closing;
		[tag replaceAttributes:[self.attributes copy]];
		//XT_TRACE_2(@"OK tag \"%@\" for string \"%@\"", [tagClass name], self.tagNameBuffer);
	} else if ([self shouldIgnoreTagNamed:tagName]) {
		XT_TRACE_1(@"ignoring known tag \"%@\"", tagName);
		tag = [XTHtmlTagNoop new];
	} else {
		 XT_WARN_1(@"unknown tag \"%@\"", tagName);
	}

	return tag;
}

- (void)initTagDefs
{
	NSArray<Class> *htmlTagClassArray = @[
	  [XTHtmlTagP class],
	  [XTHtmlTagBr class],
	  [XTHtmlTagTab class],
	  [XTHtmlTagTitle class],
	  [XTHtmlTagDiv class],
	  [XTHtmlTagQ class],
	  [XTHtmlTagH1 class],
	  [XTHtmlTagH2 class],
	  [XTHtmlTagH3 class],
	  [XTHtmlTagH4 class],
	  [XTHtmlTagH5 class],
	  [XTHtmlTagH6 class],
	  [XTHtmlTagI class],
	  [XTHtmlTagEm class],
	  [XTHtmlTagA class],
	  [XTHtmlTagB class],
	  [XTHtmlTagU class],
	  [XTHtmlTagTt class],
	  [XTHtmlTagQuestionMarkT2 class],
	  [XTHtmlTagQuestionMarkT3 class],
	  [XTHtmlTagStrong class],
	  [XTHtmlTagCenter class],
	  [XTHtmlTagAboutBox class],
	  [XTHtmlTagOl class],
	  [XTHtmlTagUl class],
	  [XTHtmlTagLi class],
	  [XTHtmlTagLh class],
	  [XTHtmlTagHr class],
	  [XTHtmlTagBanner class],
	  [XTHtmlTagTable class],
	  [XTHtmlTagTr class],
	  [XTHtmlTagTh class],
	  [XTHtmlTagTd class],
	  [XTHtmlTagBlockQuote class],
	  [XTHtmlTagCite class],
	  [XTHtmlTagFont class],
	  [XTHtmlTagCode class],
	  [XTHtmlTagPre class],
	  [XTHtmlTagImg class],
	  [XTHtmlTagBody class],
	  [XTHtmlTagSmall class],
	  [XTHtmlTagBig class],
	  [XTHtmlTagStrike class],
	  [XTHtmlTagS class],
	  [XTHtmlTagSup class],
	  [XTHtmlTagSub class],
	  [XTHtmlTagKbd class],
	  [XTHtmlTagSamp class],
	  [XTHtmlTagCredit class],
	  [XTHtmlTagListing class],
	  [XTHtmlTagAddress class],
	  [XTHtmlTagDl class],
	  [XTHtmlTagDt class],
	  [XTHtmlTagDd class],
	  [XTHtmlTagDfn class],
	  [XTHtmlTagVar class],
	  [XTHtmlTagXmp class],
	  [XTHtmlTagBasefont class],
	  [XTHtmlTagCaption class]
	];
	
	NSMutableDictionary<NSString*,Class> *tempTagClassByName = [NSMutableDictionary new];
	for (Class tagClass in htmlTagClassArray) {
		NSString *tagName = [tagClass name] ;
		NSString *tagNameLowerCase = [tagName lowercaseString];
		tempTagClassByName[tagNameLowerCase] = tagClass;
		NSArray<NSString *> *tagNameSynonyms = [tagClass nameSynonyms];
		for (NSString *synonym in tagNameSynonyms) {
			NSString *synonymLowerCase = [synonym lowercaseString];
			tempTagClassByName[synonymLowerCase] = tagClass;
		}
	}
	_tagClassByName = [NSDictionary dictionaryWithDictionary:tempTagClassByName];

	_ignoredTagNames = @[
	   @"sound",
	   @"tc",
	   @"nobr", //TODO handle
	   @"map",
	   @"area"
	   //TODO more?
	];
}

- (Class)findTagClassForName:(NSString *)tagName
{
	NSString *tagNameLowerCase = [tagName lowercaseString];
	Class res = self.tagClassByName[tagNameLowerCase];
	return res;
}

- (BOOL)shouldIgnoreTagNamed:(NSString *)tagName
{
	BOOL res = NO;
	for (NSString *ignoredTagName in self.ignoredTagNames) {
		if ([tagName caseInsensitiveCompare:ignoredTagName] == NSOrderedSame) {
			res = YES;
			break;
		}
	}
	return res;
}

- (void)addAttribute
{
	[self.charEntityParser reset];
	 
	NSString *attributeName = [NSString stringWithString:self.attributeNameBuffer];
	attributeName = [attributeName lowercaseString];
	NSString *attributeValue = [NSString stringWithString:self.attributeValueBuffer];
	NSString *attributeValueExpandedPt1 = [self.charEntityParser parse:attributeValue];
	NSString *attributeValueExpandedPt2 = [self.charEntityParser hardFlush];
	NSString *attributeValueExpanded = [NSString stringWithFormat:@"%@%@", attributeValueExpandedPt1, attributeValueExpandedPt2];
	
	XTPair *attrNameAndValue = [XTPair pairWithFirstObject:attributeName secondObject:attributeValueExpanded];
	[self.attributes addObject:attrNameAndValue];
	
	[self clearAttributeName];
	[self clearAttributeValue];
	[self.charEntityParser reset];
}

- (void)addRegularTextElement:(NSString *)string
{
	if (string != nil && string.length >= 1) {

		NSString *expandedString = [self.charEntityParser parse:string];		
		NSString *entityString = [self.charEntityParser hardFlush];
		if (entityString.length >= 1) {
			expandedString = [NSString stringWithFormat:@"%@%@", expandedString, entityString];
		}

		NSUInteger stringLen = expandedString.length;
		NSMutableString *regularTextElement = [NSMutableString string];

		for (NSUInteger idx = 0; idx < stringLen; idx++) {
			unichar ch = [expandedString characterAtIndex:idx];
			XTHtmlTag *spaceTag = nil;
			if (ch == 0x15) {
				spaceTag = [XTHtmlTagQuotedSpace new];
			} else if (ch == 0xA0) {
				spaceTag = [XTHtmlTagNonbreakingSpace new];
			} else if ([self.specialSpaceCharSet characterIsMember:ch]) {
				spaceTag = [XTHtmlTagSpecialSpace tagWithChar:ch];
			}
			if (spaceTag != nil) {
				[self addTagText:regularTextElement];
				regularTextElement = [NSMutableString string];
				[spaceTag onParsing:self];
			} else {
				[regularTextElement appendFormat:@"%C", ch];
			}
		}
		[self addTagText:regularTextElement];
	}
}

- (void)addTagText:(NSString *)text
{
	if (text != nil && text.length >= 1) {
		XTHtmlTagText *tag;
		if ([self isHonoringTags]) {
			if ([self isHonoringVerbatimWhitespace]) {
				tag = [XTHtmlTagText tagWithTextVerbatim:text];
			} else {
				tag = [XTHtmlTagText tagWithText:text];
			}
		} else {
			tag = [XTHtmlTagText tagWithTextVerbatim:text];
		}
		[tag onParsing:self];
	}
}

//TODO !!! guard against non-container tags
- (void)endNormalTag:(XTHtmlTag *)tag
{
	/*
	 *   Find the matching container and call its pre-close method.  This
	 *   allows close tags that implicitly close other tags to do the
	 *   implicit closing before we decide whether we're at the matching
	 *   level.
	 */
	[self preCloseTag:tag];
	
	/*
	 *   Make sure it matches the most recent start tag on the container
	 *   stack. If so, pop the start tag; otherwise, it's an error.
	 */
	if ([self endTagMatches:tag log:TRUE find:TRUE]) {
		/*
		 *   If we only have one container in the stack, it's the special outer
		 *   container, which we can't pop - they must have specified more end
		 *   tags than start tags.  Log it as an error and ignore the end tag.
		 */
		if ([self getContainerDepth] == 1) { //TODO !!! we also have 2nd outermost
			/* log the error and proceed */
			XT_DEF_SELNAME;
			XT_WARN_1(@"too many end tags - </%@> ignored", [[tag class] name]);
		} else {
			/* close the current tag */
			[self closeCurrentTag];
		}
	}
}

- (NSUInteger)getContainerDepth
{
	return self.containerStack.depth;
}

- (void)preCloseTag:(XTHtmlTag *)tag
{
	XTHtmlTagContainer *tagContainer;
	
	/* look up the container list for a matching tag */
	for (tagContainer = [self getInnerContainerTag];
		 tagContainer != nil;
		 tagContainer = [tagContainer getContainer]) {
		
		/* check if this is a matching tag */
		if ([tag isSameClassAs:tagContainer]) {

			/* let this tag do its close recognition work */
			[tagContainer preClose:self];
			return;
		}
	}
}

/*
 *   Determine if a tag matches the current container tag.  Returns true if
 *   so, false if not.  If it doesn't match, and "log" is true, we'll log the
 *   mismatch as an error; otherwise, we'll silently ignore it.
 *
 *   If 'find' is true, we'll search the stack for the matching open tag.  If
 *   we find a matching tag somewhere in the stack, we'll close all of the
 *   tags nested within it.  This makes us more tolerant of ill-formed HTML,
 *   where an end-tag is omitted, by allowing us to match up the close tag
 *   even if there was an unclosed tag nested within it.
 */
- (BOOL)endTagMatches:(XTHtmlTag *)endTag log:(BOOL)log find:(BOOL)find
{
	XT_DEF_SELNAME;
	
	/* get the name of the starting tag */
	XTHtmlTagContainer *startTag = [self getInnerContainerTag];
	
	/* see if it matches the ending tag */
	if (! [endTag isSameClassAs:startTag]) {

		/* log the error if desired */
		if (log) {
			/*
			 *   Error - end tag doesn't match current start tag.  Log
			 *   the error, but proceed as though it had matched.
			 */
			XT_WARN_2(@"end tag </%@> doesn't match start tag <%@>", endTag.name, startTag.name);
		}
		
		/*
		 *   If 'find' is true, search the stack for the corresponding open
		 *   tag.  If we find it, close the nested tags.  This makes us
		 *   tolerant of ill-formed HTML that omits an end tag for a nested
		 *   structure.
		 */
		if (find) {
			XTHtmlTagContainer *openTag;
			
			/* scan the stack */
			for (openTag = [self getInnerContainerTag];
				 openTag != nil;
				 openTag = [openTag getContainer]) {
				
				/* if this one matches our end tag, it's the one */
				if ([openTag isSameClassAs:endTag]) {
					break;
				}
			}
			
			/*
			 *   if we found the matching open tag, close everything nested
			 *   within it
			 */
			if (openTag != nil)	{
				
				/* found it - close everything within our open tag */
				while ([self getInnerContainerTag] != nil && [self getInnerContainerTag] != openTag) {
					/* close the current innermost container */
					[self closeCurrentTag];
				}
				
				// We found a matching opening tag unwinding
					//TODO !!! ...mail mjr
				return TRUE;
			}
		}
		
		/* return mismatch indication */
		return FALSE;
		
	} else	{
		/* it matches */
		return TRUE;
	}
}

- (void)closeCurrentTag
{
	/* tell the current container it's closing */
	XTHtmlTagContainer *closingContainerTag = [self getInnerContainerTag];
	[closingContainerTag onClose:self];
	
	/* pop the innermost container */
	[self popInnerContainer];
	
	/* tell the container we just closed it */
	[closingContainerTag postClose:self];
}

- (XTHtmlTagContainer *)getInnerContainerTag
{
	return self.containerStack.top;
}

- (void)popInnerContainer
{
	/*
	 *   if there's no container, or the container doesn't have a
	 *   container, we can't pop anything -- we can never pop the special
	 *   outermost container
	 */
	if (! [self.containerStack canPop]) {
		return;
	}
	
	/* make the current object's container the new innermost container */
	[self.containerStack pop];
}

@end



