/*  
  This file is part of KBabel
  Copyright (C) 2002 Stefan Asserhll <stefan.asserhall@telia.com>
		2003 Stanislav Visnovsky <visnovsky@kde.org>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  
*/

#include "poinfo.h"

#include "catalogitem.h"
#include "findoptions.h"
#include "msgfmt.h"
#include "resources.h"

#include <kapplication.h>
#include <kio/netaccess.h>
#include <kstandarddirs.h>

#include <qdatastream.h>
#include <qdatetime.h>
#include <qdict.h>
#include <qfile.h>
#include <qfileinfo.h>
#include <qregexp.h>
#include <qtextcodec.h>

// from libgettext
extern "C"
{
#include "libgettext/fstrcmp.h"
}

#include "libgettext/pofiles.h"
#include "libgettext/tokens.h"

#include <fstream>

using namespace KBabel;

// A PO-file cache item
struct poInfoCacheItem
{
    PoInfo info;
    QDateTime lastModified;
};

// Cache of PO-file items
static QDict<poInfoCacheItem> _poInfoCache;

// File name of cache
static QString _poInfoCacheName;

// flag to stop current reading
bool PoInfo::stopStaticRead;

bool PoInfo::_gettextPluralForm;

void PoInfo::cacheRead()
{
    QFile cacheFile( _poInfoCacheName );
    if( cacheFile.open( IO_ReadOnly ) ) {
	QDataStream s( &cacheFile );
	s.setPrintableData ( true );
	QString url;
	int version;

	s >> version;
	if( version == POINFOCACHE_VERSION ) // Only read if correct version
	    while( !s.atEnd() ) {
		poInfoCacheItem* item = new poInfoCacheItem;
		s >> url;
		s >> item->info.total;
		s >> item->info.fuzzy;
		s >> item->info.untranslated;
		s >> item->info.project;
		s >> item->info.creation;
		s >> item->info.revision;
		s >> item->info.lastTranslator;
		s >> item->info.languageTeam;
		s >> item->info.mimeVersion;
		s >> item->info.contentType;
		s >> item->info.encoding;
		s >> item->info.others;
		s >> item->info.headerComment;
		s >> item->lastModified;
		_poInfoCache.insert( url, item );
	    }
	cacheFile.close();
    }
}

void PoInfo::cacheWrite()
{
    QFile cacheFile( _poInfoCacheName );
    QDictIterator<poInfoCacheItem> it( _poInfoCache ); // iterator for dict

    if( cacheFile.open( IO_WriteOnly ) ) {
	QDataStream s( &cacheFile );
	// FIXME: remove when finally found the problem with corrupted cache
	s.setPrintableData ( true );
	int version = POINFOCACHE_VERSION;

	s << version;
	while( it.current() ) {
	    if( QFile::exists( it.currentKey() ) ) {
		poInfoCacheItem* item = it.current();
		s << it.currentKey();
		s << item->info.total;
		s << item->info.fuzzy;
		s << item->info.untranslated;
		s << item->info.project;
		s << item->info.creation;
		s << item->info.revision;
		s << item->info.lastTranslator;
		s << item->info.languageTeam;
		s << item->info.mimeVersion;
		s << item->info.contentType;
		s << item->info.encoding;
		s << item->info.others;
		s << item->info.headerComment;
		s << item->lastModified;
	    }
	    ++it;
	}
	cacheFile.close();
    }
}

bool PoInfo::cacheFind(const QString url, PoInfo& info)
{
    // Read cache if it has not been read, and set up post routine to write it
    static bool _cacheIsRead = false;
    if( !_cacheIsRead ) {
	_cacheIsRead = true;
        _poInfoCacheName = locateLocal("data", "kbabel/poinfocache");
	cacheRead();
    }

    poInfoCacheItem *item = _poInfoCache.find( url );
    if( item ) {
	QFileInfo fi( url );

	if( fi.lastModified() == item->lastModified ) {
	    info = item->info;
	    return true;
	}
    }
    return false;
}

void PoInfo::cacheSave(const QString url, PoInfo& info)
{
    poInfoCacheItem *item = new poInfoCacheItem;
    QFileInfo fi( url );

    item->info = info;
    item->lastModified = fi.lastModified();
    _poInfoCache.insert( url, item );
}

QTextCodec* PoInfo::codecForFile(QString gettextHeader)
{
   QString charset;

   QString head = gettextHeader;

   QRegExp r("Content-Type:\\s*\\w+/[-\\w]+;\\s*charset\\s*=\\s*[^\\\"\\n]+");
   int begin=r.search(head);
   int len=r.matchedLength();
   if(begin<0) {
   	kdDebug(KBABEL) << "no charset entry found" << endl;
   	return 0;
   }	
   	
   head = head.mid(begin,len);

   QRegExp regexp("charset *= *([^\\\\\\\"]+)");
   if( regexp.search( head ) > -1 )
   {
       charset = regexp.cap(1);
   }

   QTextCodec* codec=0;

   if(!charset.isEmpty())
   {
      // "CHARSET" is the default charset entry in a template (pot).
      // characters in a template should be either pure ascii or 
      // at least utf8, so utf8-codec can be used for both.
      if( charset == "CHARSET")
      {
          codec=QTextCodec::codecForName("utf8");
          kdDebug(KBABEL) 
              << QString("file seems to be a template: using utf8 encoding.")
              << endl;
      }
      else
      {
         codec=QTextCodec::codecForName(charset.latin1());
      }

      if(!codec)
      {
         kdWarning() << "charset found, but no codec available, using UTF8 instead" << endl;
	 codec=QTextCodec::codecForName("utf8");
      }
   }

   return codec;
}

PoInfo PoInfo::headerInfo(const CatalogItem& headerItem)
{
   QStringList header=headerItem.msgstrAsList();

   QStringList::Iterator it;

   PoInfo info;

   // extract information from the header
   for(it=header.begin();it!=header.end();++it)
   {
      if((*it).contains(QRegExp("^\\s*Project-Id-Version\\s*:\\s*.+\\s*$")))
      {
         info.project=(*it).replace(QRegExp("^\\s*Project-Id-Version\\s*:\\s*"),"");

         if(info.project.right(2)=="\\n")
            info.project.remove(info.project.length()-2,2);
         
         info.project=info.project.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*POT-Creation-Date\\s*:\\s*.+\\s*$")))
      {
         info.creation=(*it).replace(QRegExp("^\\s*POT-Creation-Date\\s*:\\s*"),"");

         if(info.creation.right(2)=="\\n")
            info.creation.remove(info.creation.length()-2,2);

         info.creation=info.creation.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*PO-Revision-Date\\s*:\\s*.+\\s*$")))
      {
         info.revision=(*it).replace(QRegExp("^\\s*PO-Revision-Date\\s*:\\s*"),"");

         if(info.revision.right(2)=="\\n")
            info.revision.remove(info.revision.length()-2,2);

         info.revision=info.revision.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Last-Translator\\s*:\\s*.+\\s*$")))
      {
         info.lastTranslator=(*it).replace(QRegExp("^\\s*Last-Translator\\s*:\\s*"),"");

         if(info.lastTranslator.right(2)=="\\n")
            info.lastTranslator.remove(info.lastTranslator.length()-2,2);

         info.lastTranslator=info.lastTranslator.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Language-Team\\s*:\\s*.+\\s*")))
      {
         info.languageTeam=(*it).replace(QRegExp("^\\s*Language-Team\\s*:\\s*"),"");

         if(info.languageTeam.right(2)=="\\n")
            info.languageTeam.remove(info.languageTeam.length()-2,2);

         info.languageTeam=info.languageTeam.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*MIME-Version\\s*:\\s*.+\\s*")))
      {
         info.mimeVersion=(*it).replace(QRegExp("^\\s*MIME-Version\\s*:\\s*"),"");

         if(info.mimeVersion.right(2)=="\\n")
            info.mimeVersion.remove(info.mimeVersion.length()-2,2);

         info.mimeVersion=info.mimeVersion.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Content-Type\\s*:\\s*.+\\s*")))
      {
         info.contentType=(*it).replace(QRegExp("^\\s*Content-Type\\s*:\\s*"),"");

         if(info.contentType.right(2)=="\\n")
            info.contentType.remove(info.contentType.length()-2,2);

         info.contentType=info.contentType.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Content-Transfer-Encoding\\s*:\\s*.+\\s*")))
      {
         info.encoding=(*it).replace(QRegExp("^\\s*Content-Transfer-Encoding\\s*:\\s*"),"");

         if(info.encoding.right(2)=="\\n")
            info.encoding.remove(info.encoding.length()-2,2);

         info.encoding=info.encoding.simplifyWhiteSpace();
      }
      else
      {
          QString line=(*it);

         if(line.right(2)=="\\n")
            line.remove(line.length()-2,2);

         line=line.simplifyWhiteSpace();
         if(!info.others.isEmpty())
             info.others+='\n';

         info.others+=line;
      }


   }

   info.headerComment=headerItem.comment();

   return info;
}


ConversionStatus PoInfo::info(const QString& url, PoInfo& info, QStringList &wordList, bool updateWordList, bool interactive)
{
   stopStaticRead = false;

   if( !updateWordList && PoInfo::cacheFind( url, info ) )
       return OK;

   QString target;
   if(KIO::NetAccess::download(KURL( url ), target))
   {
       QFile file(target);

	   // first check file with msgfmt to be sure, it is 
	   // syntactically correct
	   Msgfmt msgfmt;
	   QString output;
	   Msgfmt::Status stat = msgfmt.checkSyntax( target , output );
	   if(stat == Msgfmt::SyntaxError)
	   {
          KIO::NetAccess::removeTempFile(target);
		  return PARSE_ERROR;
	   }


       std::ifstream* stream = new std::ifstream( file.name().local8Bit());
       if( stream->is_open() )
       {
           CatalogItem temp;

           info.total=0;
           info.fuzzy=0;
           info.untranslated=0;

	   GettextFlexLexer* lexer = new GettextFlexLexer( stream  );

	   lexer->yylex();
	   
           // now parse the rest of the file
           ConversionStatus success=OK;
	   
           while( lexer->lastToken != T_EOF && success==OK)
           {
               if( interactive ) kapp->processEvents(10);
	       
	       if( stopStaticRead )
	       {
	    	    delete lexer;
		    delete stream;
		    return OK;
		}
	       
	       success=fastRead(temp,lexer,false);
	       
               if(success==OK || success==RECOVERED_PARSE_ERROR)
               {
		  success=OK;
		  
		  if( temp.comment().contains("\n#~") ) continue; // skip obsolete
		  
                  if( temp.msgid().first().isEmpty()) //header
		  {
		      if( temp.isFuzzy() )  temp.removeFuzzy();
		      
		      //find out the codec
		      QTextCodec* codec = codecForFile( temp.msgstr().first() );
		      if( !codec ) return PARSE_ERROR;
		      
		      // convert from UTF-8 using codec
		      temp.setComment( codec->toUnicode(temp.comment().utf8()) );
		      temp.setMsgstr( codec->toUnicode(temp.msgstr().first().utf8()) );
		      
		      PoInfo infoCounts = info;
		      info=PoInfo::headerInfo(temp);
		      info.total = infoCounts.total;
		      info.fuzzy = infoCounts.fuzzy;
		      info.untranslated = infoCounts.untranslated;
		      continue; // do not update counters and word list for header
		  }
		  		  
                  info.total++;

                  if(temp.isFuzzy())
                     info.fuzzy++;
                  else if(temp.isUntranslated())
                     info.untranslated++;
		     
		  if( updateWordList )
		  {
		    // FIXME: should care about plural forms in msgid
		    QString st = temp.msgid().first().simplifyWhiteSpace().lower();
		    QStringList sl = QStringList::split( ' ', st );
		    while(!sl.isEmpty())
		    {
			QString w = sl.first();
			sl.pop_front();
			if( !wordList.contains(w) ) wordList.append( w );
		    }
		    st = temp.msgstr().join(" " ).simplifyWhiteSpace().lower();
		    sl = QStringList::split( ' ', st );
		    while(!sl.isEmpty())
		    {
			QString w = sl.first();
			sl.pop_front();
			if( !wordList.contains(w) ) wordList.append( w );
		    }
		    st = temp.comment().simplifyWhiteSpace().lower();
		    sl = QStringList::split( ' ', st );
		    while(!sl.isEmpty())
		    {
			QString w = sl.first();
			sl.pop_front();
			if( !wordList.contains(w) ) wordList.append( w );
		    }
                }
	      }
           }

	   delete lexer;
	   delete stream;

           if(success==PARSE_ERROR)
           {
	       KIO::NetAccess::removeTempFile(target);
               return PARSE_ERROR;
           }
       }
       else
       {
          delete stream;
          KIO::NetAccess::removeTempFile(target);
          return NO_PERMISSIONS;
       }

	KIO::NetAccess::removeTempFile(target);
	if( target == url )
	    PoInfo::cacheSave( url, info );
        return OK;
   }
   else
   {
      return OS_ERROR;
   }

   return OK;
}

bool PoInfo::findInFile( const QString& url, FindOptions options )
{
   enum {Begin, Comment, Msgid, Msgstr} part = Begin;
   
   stopStaticRead = false;
   QString target;
   if(KIO::NetAccess::download(KURL( url ), target))
   {
       std::ifstream* stream = new std::ifstream( target.local8Bit()); 
       if(stream->is_open())
       {
           KIO::NetAccess::removeTempFile(target);
	   
	   GettextFlexLexer* lexer = new GettextFlexLexer( stream );

	   lexer->yylex();

           // prepare the search
	   
	   QString searchStr = options.findStr;
	   QRegExp regexp( searchStr );
	   
	   if( options.isRegExp ) 
		regexp.setCaseSensitive( options.caseSensitive );

           // first read header
	   CatalogItem temp;
	   
	   ConversionStatus status = fastRead( temp, lexer, true );
	   if( status != OK || !temp.msgid().first().isEmpty() ) 
	   {
		delete lexer;
		delete stream;
		return false; // header is not at the beginning, broken file
	   }

	   QTextCodec* codec = codecForFile( temp.msgstr().first() );
	   if( !codec ) 
	   {
		return false;
	   }
	   
	   // now parse the rest of the file
	   QString text;
	   int pos,len;
	   
           while(lexer->lastToken != T_EOF)
           {
	       switch( lexer->lastToken ) {
	           case T_COMMENT: {
			part = Comment;
			if( !options.inComment ) break;
			text = codec->toUnicode(lexer->YYText()); 
			if( options.isRegExp )
			    pos=regexp.search(text, 0 );
			else 
			    pos=text.find(searchStr,0,options.caseSensitive);
			if( pos >= 0)
			{
			    if( options.wholeWords) {
				len = searchStr.length();
				QString pre = text.mid(pos-1,1);
				QString post = text.mid(pos+len,1);
				if( !pre.contains( QRegExp("[a-zA-Z0-9]")) &&
				    !post.contains( QRegExp("[a-zA-Z0-9]") )
				) {
				    delete lexer;
				    delete stream;
				    return true;
				}
			    }
			    else {
				delete lexer;
				delete stream;
				return true;
			    };
			}
			break;
		   }
		   case T_STRING: {
			if( part == Msgid && !options.inMsgid ) break;
			if( part == Msgstr && !options.inMsgstr ) break;
			
			text = codec->toUnicode(lexer->YYText()); 
			
			if( options.ignoreContextInfo )
			{
			    pos = options.contextInfo.search(text);
			    len = options.contextInfo.matchedLength();
			    if( pos >= 0 )
		    		text.remove( pos, len );
			}
		    
			if( options.ignoreAccelMarker )
			{
			    pos = text.find( options.accelMarker );
			    if( pos >= 0 )
				text.remove( pos, 1 );
			}
			
			if( options.isRegExp )
			    pos=regexp.search(text, 0 );
			else 
			    pos=text.find(searchStr,0,options.caseSensitive);

			if( pos >= 0)
			{
			    if( options.wholeWords) {
				len = searchStr.length();
				QString pre = text.mid(pos-1,1);
				QString post = text.mid(pos+len,1);
				if( !pre.contains( QRegExp("[a-zA-Z0-9]")) &&
				    !post.contains( QRegExp("[a-zA-Z0-9]") )
				) {
				    delete lexer;
				    delete stream;
				    return true;
				}
			    }
			    else {
				delete lexer;
				delete stream;
				return true;
			    };
			}
			break;
		   }
		   case T_MSGSTR: {
			part = Msgstr;
			break;
		   }
		   case T_MSGID: 
		   case T_MSGIDPLURAL: {
	    		kapp->processEvents(10);
			
			// if stopped, return not found
			if( stopStaticRead ) 
			{
			    delete lexer;
			    delete stream;
			    return false;
			}
			part = Msgid;
			break;
		   }
	       }
	       lexer->yylex();
           }
	   delete lexer;
	   delete stream;
       }
    }
    return false;
}

// this does not like any incorrect files
ConversionStatus PoInfo::fastRead( CatalogItem& item, GettextFlexLexer *lexer, bool storeText)
{
   item.clear();
   _gettextPluralForm = false;

    // comment
    if( lexer->lastToken == T_COMMENT )
    {
	QString _comment = QString::fromUtf8(lexer->YYText());
	while( lexer->yylex() == T_COMMENT )
	    _comment += "\n"+QString::fromUtf8(lexer->YYText());
	item.setComment( _comment );
//	kdDebug(KBABEL) << "Comment: " << _comment << endl;
    }

    //obsolete
    if( lexer->lastToken == T_OBSOLETE ) {
	lexer->yylex();
	item.setComment("#~\n#~");
	return OK;
    }
        
    // msgid
    if( lexer->lastToken != T_MSGID ) return PARSE_ERROR;
    
    if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
    QStringList msgids = item.msgid();
    QStringList::Iterator it = msgids.begin();
    *it = QString::fromUtf8(lexer->YYText());
    if( storeText )
	while( lexer->yylex() == T_STRING )
    	    (*it) += ("\n"+ QString::fromUtf8(lexer->YYText()) );
    else {
	if( lexer->yylex() == T_STRING ) // this is not header
	{
	    *it = "SKIPPED";
	    while( lexer->yylex() == T_STRING );
	}
    }
    item.setMsgid( msgids );
    
//    kdDebug(KBABEL) << "Msgid: " << *it << endl;

    if( lexer->lastToken == T_MSGIDPLURAL ) 
    {
	_gettextPluralForm = true;
	if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
	QStringList msgids = item.msgid();
	it = msgids.fromLast();
	*it = QString::fromUtf8(lexer->YYText());
	if( storeText ) 
	    while( lexer->yylex() == T_STRING )
    		(*it)+="\n"+ QString::fromUtf8(lexer->YYText());
	else while( lexer->yylex() == T_STRING );
	item.setMsgid( msgids );
//	kdDebug(KBABEL) << "Msgid_plural: " << *it << endl;
    }
    
    // msgstr
    if( lexer->lastToken != T_MSGSTR ) return PARSE_ERROR;

    if( !_gettextPluralForm )
    {
	if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
	
	QStringList msgstrs = item.msgstr();
	it = msgstrs.begin();
	*it = QString::fromUtf8(lexer->YYText());
	if( storeText || item.msgid().first().isEmpty() ) // if we should store the text or it is a header
	    while( lexer->yylex() == T_STRING )
    		(*it)+= ("\n"+ QString::fromUtf8(lexer->YYText()));
	else 
	if( lexer->yylex() == T_STRING ) // check next token, whether it is really translated
	{
	    *it = "SKIPPED";
	    while( lexer->yylex() == T_STRING );
	}
	item.setMsgstr( msgstrs );
//	kdDebug(KBABEL) << "Msgstr: " << *it << endl;
    } 
    else 
    {
	QStringList msgstrs = item.msgstr();
	QString s = QString::fromUtf8(lexer->YYText());
	while( lexer->lastToken == T_MSGSTR && s.contains( QRegExp("^msgstr\\[[0-9]+\\]" ) ) )
	{
	    if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
	    it = msgstrs.fromLast();
	    *it = QString::fromUtf8(lexer->YYText());
	    
	    if( storeText )
		do {
    		    (*it)+="\n"+QString::fromUtf8(lexer->YYText());
		} while( lexer->yylex() == T_STRING );
	    else while( lexer->yylex() == T_STRING );
//	    kdDebug(KBABEL) << "Msgstr: " << *it << endl;
	    s = QString::fromUtf8(lexer->YYText());
	} 
	item.setMsgstr( msgstrs );
    }
    
    return OK;
}
