/***************************************************************************
	pseudo_dtd.cpp
	copyright			: (C) 2001-2002 by Daniel Naber
	email				: daniel.naber@t-online.de
 ***************************************************************************/

/***************************************************************************
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 ***************************************************************************/

#include "pseudo_dtd.h"

#include <assert.h>

#include <qdom.h>
#include <qregexp.h>

#include <klocale.h>
#include <kmessagebox.h>

PseudoDTD::PseudoDTD()
{
	//kdDebug() << "PseudoDTD::PseudoDTD()" << endl;
	// "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
	m_sgml_support = true;	  // TODO: make this an run-time option (maybe automatically set)
}

PseudoDTD::~PseudoDTD()
{
	//kdDebug() << "PseudoDTD::~PseudoDTD()" << endl;
}

void PseudoDTD::analyzeDTD(QString &meta_dtd_url, QString &meta_dtd)
{
	QDomDocument doc("dtd_in_xml");
	if ( !doc.setContent(meta_dtd) ) {
		KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
			"Please check that the file is well-formed XML.").arg(meta_dtd_url),
			i18n("XML Plugin Error"));
		return;
	}
	if ( doc.doctype().name() != "dtd" ) {
		KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
			"Please check that the file is of this type:\n"
			"-//Norman Walsh//DTD DTDParse V2.0//EN\n"
			"You can produce such files with dtdparse. "
			"See the Kate Plugin documentation for more information.").arg(meta_dtd_url),
			i18n("XML Plugin Error"));
		return;
	}

	uint list_length = 0;
	list_length += doc.elementsByTagName("entity").count();
	list_length += doc.elementsByTagName("element").count();
	// count this twice, as it will be iterated twice (TODO: optimize that?):
	list_length += doc.elementsByTagName("attlist").count() * 2;

	QProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), list_length,
								0, "progress", TRUE );
	progress.setMinimumDuration(400);
	progress.setProgress(0);
	
	// Get information from meta DTD and put it in Qt data structures for fast access:
	if( ! getEntities(&doc, &progress) ) {
		return;
	}
	if( ! getAllowedElements(&doc, &progress) ) {
		return;
	}
	if( ! getAllowedAttributes(&doc, &progress) ) {
		return;
	}
	if( ! getAllowedAttributeValues(&doc, &progress) ) {
		return;
	}

	progress.setProgress(list_length);	// just to make sure the dialog disappears

}

// ========================================================================
// DOM stuff:

/** Iterate through the XML to get a mapping which sub-elements are allowed for
  * all elements.
  */
bool PseudoDTD::getAllowedElements(QDomDocument *doc, QProgressDialog *progress)
{

	m_elements_list.clear();
	// We only display a list, i.e. we pretend that the content model is just
	// a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element, 
	// which would otherwise display some elements twice.
	QMap<QString,bool> subelement_list;	// the bool is not used
	
	QDomNodeList list = doc->elementsByTagName("element");
	uint list_length = list.count();	// speedup (really!)

	for( uint i = 0; i < list_length; i++ ) {
		if( progress->wasCancelled() ) {
			return false;
		}
		progress->setProgress(progress->progress()+1);
		// FIXME!:
		//qApp->processEvents();

		subelement_list.clear();
		QDomNode node = list.item(i);
		QDomElement elem = node.toElement();
		
		if( !elem.isNull() ) {

			// Enter the expanded content model, which may also include stuff not allowed.
			// We do not care if it's a <sequence-group> or whatever.
			QDomNodeList content_model_list = elem.elementsByTagName("content-model-expanded");
			QDomNode content_model_node = content_model_list.item(0);
			QDomElement content_model_elem = content_model_node.toElement();
			if( ! content_model_elem.isNull() ) {
				// check for <pcdata/>:
				QDomNodeList pcdata_list = content_model_elem.elementsByTagName("pcdata");
				// check for other sub elements:
				QDomNodeList sub_list = content_model_elem.elementsByTagName("element-name");
				uint sub_list_length = sub_list.count();
				for( uint l = 0; l < sub_list_length; l++ ) {
					QDomNode sub_node = sub_list.item(l);
					QDomElement sub_elem = sub_node.toElement();
					if( !sub_elem.isNull() ) {
						subelement_list[sub_elem.attribute("name")] = true;
					}
				}
                                // anders: check if this is an EMPTY element, and put "__EMPTY" in the
                                // sub list, so that we can insert tags in empty form if required.
                                QDomNodeList empty_list = elem.elementsByTagName("empty");
                                if ( empty_list.count() ) {
                                        subelement_list["__EMPTY"] = true;
                                }
			}
			// Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a> 
			// in the HTML 4.01 Strict DTD):
			QDomNodeList exclusions_list = elem.elementsByTagName("exclusions");
			if( exclusions_list.length() > 0 ) {	// sometimes there are no exclusions (e.g. in XML DTDs there are never exclusions)
				QDomNode exclusions_node = exclusions_list.item(0);
				QDomElement exclusions_elem = exclusions_node.toElement();
				if( ! exclusions_elem.isNull() ) {
					QDomNodeList sub_list = exclusions_elem.elementsByTagName("element-name");
					uint sub_list_length = sub_list.count();
					for( uint l = 0; l < sub_list_length; l++ ) {
						QDomNode sub_node = sub_list.item(l);
						QDomElement sub_elem = sub_node.toElement();
						if( !sub_elem.isNull() ) {
							QMap<QString,bool>::Iterator it = subelement_list.find(sub_elem.attribute("name"));
							if( it != subelement_list.end() ) {
								subelement_list.remove(it);
							}
						}
					}
				}
			}

			// turn the map into a list:
			QStringList subelement_list_tmp;
			QMap<QString,bool>::Iterator it;
			for( it = subelement_list.begin(); it != subelement_list.end(); ++it ) {
				subelement_list_tmp.append(it.key());
			}
			m_elements_list.insert(elem.attribute("name"), subelement_list_tmp);

		}
		
	} // end iteration over all <element> nodes
	return true;
}

/** Check which elements are allowed inside a parent element. This returns
  * a list of allowed elements, but it doesn't care about order or if only a certain
  * number of occurences is allowed.
  */
QStringList PseudoDTD::getAllowedElementsFast(QString parent_element)
{
	if( m_sgml_support ) {
		// find the matching element, ignoring case:
		QMap<QString,QStringList>::Iterator it;
		for( it = m_elements_list.begin(); it != m_elements_list.end(); ++it ) {
			if( it.key().lower() == parent_element.lower() ) {
				return it.data();
			}
		}
	} else {
		if( m_elements_list.contains(parent_element) ) {
			return m_elements_list[parent_element];
		}
	}
	return QStringList();
}

/** Iterate through the XML to get a mapping which attributes are allowed inside 
  * all elements.
  */
bool PseudoDTD::getAllowedAttributes(QDomDocument *doc, QProgressDialog *progress)
{
	m_attributes_list.clear();
	QStringList allowed_attributes;
	QDomNodeList list = doc->elementsByTagName("attlist");
	uint list_length = list.count();

	for( uint i = 0; i < list_length; i++ ) {
		if( progress->wasCancelled() ) {
			return false;
		}
		progress->setProgress(progress->progress()+1);
		// FIXME!!
		//qApp->processEvents();
		allowed_attributes.clear();
		QDomNode node = list.item(i);
		QDomElement elem = node.toElement();
		if( !elem.isNull() ) {
			// Enter the list of <attribute>:
			QDomNodeList attribute_list = elem.elementsByTagName("attribute");
			uint attribute_list_length = attribute_list.count();
			for( uint l = 0; l < attribute_list_length; l++ ) {
				QDomNode attribute_node = attribute_list.item(l);
				QDomElement attribute_elem = attribute_node.toElement();
				if( ! attribute_elem.isNull() ) {
					allowed_attributes.append(attribute_elem.attribute("name"));
				}
			}
			m_attributes_list.insert(elem.attribute("name"), allowed_attributes);
		}
	}
	return true;
}

/** Check which attributes are allowed for an element.
  */
QStringList PseudoDTD::getAllowedAttributesFast(QString element)
{
	if( m_sgml_support ) {
		// find the matching element, ignoring case:
		QMap<QString,QStringList>::Iterator it;
		for( it = m_attributes_list.begin(); it != m_attributes_list.end(); ++it ) {
			if( it.key().lower() == element.lower() ) {
				return it.data();
			}
		}
	} else {
		if( m_attributes_list.contains(element) ) {
			return m_attributes_list[element];
		}
	}
	return QStringList();
}

/** Iterate through the XML to get a mapping which attribute values are allowed
  * for all attributes inside all elements.
  */
bool PseudoDTD::getAllowedAttributeValues(QDomDocument *doc, QProgressDialog *progress)
{
	m_attributevalues_list.clear();						// 1 element : n possible attributes
	QMap<QString,QStringList> attributevalues_tmp;		// 1 attribute : n possible values
	QDomNodeList list = doc->elementsByTagName("attlist");
	uint list_length = list.count();

	for( uint i = 0; i < list_length; i++ ) {
		if( progress->wasCancelled() ) {
			return false;
		}
		progress->setProgress(progress->progress()+1);
		// FIXME!
		//qApp->processEvents();
		
		attributevalues_tmp.clear();
		QDomNode node = list.item(i);
		QDomElement elem = node.toElement();
		if( !elem.isNull() ) {
			// Enter the list of <attribute>:
			QDomNodeList attribute_list = elem.elementsByTagName("attribute");
			uint attribute_list_length = attribute_list.count();
			for( uint l = 0; l < attribute_list_length; l++ ) {
				QDomNode attribute_node = attribute_list.item(l);
				QDomElement attribute_elem = attribute_node.toElement();
				if( ! attribute_elem.isNull() ) {
					QString value = attribute_elem.attribute("value");
					attributevalues_tmp.insert(attribute_elem.attribute("name"), QStringList::split(QRegExp(" "), value));
				}
			}
			m_attributevalues_list.insert(elem.attribute("name"), attributevalues_tmp);
		}
	}
	return true;
}

/** Check which attributes values are allowed for an attribute in an element
  * (the element is necessary because e.g. "href" inside <a> could be different
  * to an "href" inside <link>):
  */
QStringList PseudoDTD::getAllowedAttributeValuesFast(QString element, QString attribute)
{
	// Direct access would be faster than iteration of course but not always correct, 
	// because we need to be case-insensitive.
	if( m_sgml_support ) {
		// first find the matching element, ignoring case:
		QMap< QString,QMap<QString,QStringList> >::Iterator it;
		for( it = m_attributevalues_list.begin(); it != m_attributevalues_list.end(); ++it ) {
			if( it.key().lower() == element.lower() ) {
				QMap<QString,QStringList> attr_vals = it.data();
				QMap<QString,QStringList>::Iterator it_v;
				// then find the matching attribute for that element, ignoring case:
				for( it_v = attr_vals.begin(); it_v != attr_vals.end(); ++it_v ) {
					if( it_v.key().lower() == attribute.lower() ) {
						return(it_v.data());
					}
				}
			}
		}
	} else {
		if( m_attributevalues_list.contains(element) ) {
			QMap<QString,QStringList> attr_vals = m_attributevalues_list[element];
			if( attr_vals.contains(attribute) ) {
				return attr_vals[attribute];
			}
		}
	}
	// no predefined values available:
	return QStringList();
}

/** Iterate through the XML to get a mapping of all entity names and their expanded 
  * version, e.g. nbsp => &#160;. Parameter entities are ignored.
  */
bool PseudoDTD::getEntities(QDomDocument *doc, QProgressDialog *progress)
{
	m_entity_list.clear();
	QDomNodeList list = doc->elementsByTagName("entity");
	uint list_length = list.count();
	
	for( uint i = 0; i < list_length; i++ ) {
		if( progress->wasCancelled() ) {
			return false;
		}
		progress->setProgress(progress->progress()+1);
		//FIXME!!
		//qApp->processEvents();
		QDomNode node = list.item(i);
		QDomElement elem = node.toElement();
		if( !elem.isNull() 
			&& elem.attribute("type") != "param" ) { // TODO: what's cdata <-> gen ?
			QDomNodeList expanded_list = elem.elementsByTagName("text-expanded");
			QDomNode expanded_node = expanded_list.item(0);
			QDomElement expanded_elem = expanded_node.toElement();
			if( ! expanded_elem.isNull() ) {
				QString exp = expanded_elem.text();
				// TODO: support more than one &#...; in the expanded text
				/* TODO include do this when the unicode font problem is solved:
				if( exp.contains(QRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
					// hexadecimal numbers, e.g. "&#x236;"
					uint end = exp.find(";");
					exp = exp.mid(3, end-3);
					exp = QChar();
				} else if( exp.contains(QRegExp("^&#[0-9]+;$")) ) {
					// decimal numbers, e.g. "&#236;"
					uint end = exp.find(";");
					exp = exp.mid(2, end-2);
					exp = QChar(exp.toInt());
				}
				*/
				m_entity_list.insert(elem.attribute("name"), exp);
			} else {
				m_entity_list.insert(elem.attribute("name"), QString());
			}
		}
	}
	return true;
}

/** Get a list of all (non-parameter) entities that start with a certain string.
  */
QStringList PseudoDTD::getEntitiesFast(QString start)
{
	QStringList entities;
	QMap<QString,QString>::Iterator it;
	for( it = m_entity_list.begin(); it != m_entity_list.end(); ++it ) {
		if( (*it).startsWith(start) ) {
			QString str = it.key();
			/* TODO: show entities as unicode character
			if( !it.data().isEmpty() ) {
				//str += " -- " + it.data();
				QRegExp re("&#(\\d+);");
				if( re.search(it.data()) != -1 ) {
					uint ch = re.cap(1).toUInt();
					str += " -- " + QChar(ch).decomposition();
				}
				//kdDebug() << "#" << it.data() << endl;
			}
			*/
			entities.append(str);
			// TODO: later use a table view
		}
	}
	return entities;
}
