/* GNU moe - My Own Editor
   Copyright (C) 2005-2022 Antonio Diaz Diaz.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <cctype>
#include <string>
#include <vector>

#include "buffer.h"
#include "iso_8859.h"
#include "regex.h"


namespace Regex {

class Set
  {
  std::vector< unsigned char > data;		// faster than bool
  int parsed_len_;
  bool in;

public:
  explicit Set( const std::string & regex, const unsigned i0 = 0 );
  bool includes( const unsigned char ch ) const
    { return ( parsed_len_ > 0 && in == data[ch] ); }
  int parsed_len() const { return parsed_len_; }
  };


Set::Set( const std::string & regex, const unsigned i0 )
  : data( 256, false ), parsed_len_( 0 ), in( true )
  {
  if( i0 + 2 >= regex.size() || regex[i0] != '[' ) return;
  unsigned i = i0 + 1;
  bool fail = true;
  if( regex[i] == '^' ) { ++i; in = false; }
  if( regex[i] == ']' ) data[regex[i++]] = true;

  for( ; i < regex.size(); ++i )
    {
    unsigned char ch1 = regex[i];
    if( ch1 == ']' ) { ++i; fail = false; break; }
    if( ch1 == '\\' )
      {
      int len, cht = ISO_8859::escape( regex, i + 1, &len );
      if( cht < 0 ) break;
      ch1 = cht; i += len;
      }
    if( i + 2 >= regex.size() || regex[i+1] != '-' || regex[i+2] == ']' )
      data[ch1] = true;
    else					// range
      {
      i += 2;
      unsigned char ch2 = regex[i];
      if( ch2 == '\\' )
        {
        int len, cht = ISO_8859::escape( regex, i + 1, &len );
        if( cht < 0 ) break;
        ch2 = cht; i += len;
        }
      for( int c = ch1; c <= ch2; ++c ) data[c] = true;
      }
    }
  if( !fail ) parsed_len_ = i - i0;
  }


int match_regex( const Basic_buffer & buffer, Point & p,
                 const std::string & regex, unsigned i,
                 std::vector< std::string > & pieces, const bool icase );

inline int case_plus( const Basic_buffer & buffer, Point & p,
                      const std::string & regex, unsigned & i,
                      std::vector< std::string > & pieces, const bool icase )
  {
  if( ++i >= regex.size() ) return 2;
  const int ps = pieces.size();
  std::string piece;		// Find shortest matching sequence
  int counter = 0;		// Minimum number of matches required

  while( i + 1 < regex.size() && regex[i] == '\\' && regex[i+1] == '+' )
    { ++counter; i += 2; }

  unsigned char ch1 = regex[i];
  if( ch1 == '\\' )
    {
    if( i + 1 >= regex.size() ) return 2;
    const unsigned char chr = regex[i+1];
    if( chr == '[' )
      {
      Set set( regex, i + 1 );		// Set of characters to match
      if( !set.parsed_len() ) return 2;
      i += set.parsed_len();
      for( int j = 0; ; ++j )
        {
        if( j >= counter )
          {
          const int ret = match_regex( buffer, p, regex, i + 1, pieces, icase );
          if( ret == 0 ) break; else if( ret != 1 ) return 2;
          }
        const int ch2 = buffer.pgetc( p );
        if( ch2 < 0 || !set.includes( ch2 ) ) return 1;
        piece += ch2;
        }
      pieces.insert( pieces.begin() + ps, piece ); i = regex.size();
      return 0;
      }
    if( chr == 'w' || chr == 'W' )
      {
      ++i;
      for( int j = 0; ; ++j )
        {
        if( j >= counter )
          {
          const int ret = match_regex( buffer, p, regex, i + 1, pieces, icase );
          if( ret == 0 ) break; else if( ret != 1 ) return 2;
          }
        const int ch2 = buffer.pgetc( p );
        if( ch2 < 0 || ( chr == 'w' ) != ISO_8859::isalnum_( ch2 ) ) return 1;
        piece += ch2;
        }
      pieces.insert( pieces.begin() + ps, piece ); i = regex.size();
      return 0;
      }
    int len, cht = ISO_8859::escape( regex, i + 1, &len );
    if( cht < 0 ) return 2;
    ch1 = cht; i += len;
    }
  for( int j = 0; ; ++j )
    {
    if( j >= counter )
      {
      const int ret = match_regex( buffer, p, regex, i + 1, pieces, icase );
      if( ret == 0 ) break; else if( ret != 1 ) return 2;
      }
    const int ch2 = buffer.pgetc( p );
    if( ch2 < 0 ||
        ( icase && ISO_8859::tolower( ch1 ) != ISO_8859::tolower( ch2 ) ) ||
        ( !icase && ch1 != ch2 ) ) return 1;
    piece += ch2;
    }
  pieces.insert( pieces.begin() + ps, piece ); i = regex.size();
  return 0;
  }


/* Return 0 if a substring of 'buffer' beginning at 'p' matches 'regex[i]'.
   If match, set 'p' after the substring found and return in 'pieces' the
   text matching every '\*', '\?', etc, from the regular expresion.
   Return 1 if mismatch, 2 if regex syntax error.
*/
int match_regex( const Basic_buffer & buffer, Point & p,
                 const std::string & regex, unsigned i,
                 std::vector< std::string > & pieces, const bool icase )
  {
  if( i >= regex.size() ) return 0;
  const Point old_p = p;
  const unsigned pieces_old_size = pieces.size();
  int retval = 0;

  for( ; i < regex.size() && retval == 0; ++i )
    {
    unsigned char ch1 = regex[i];
    if( ch1 != '\\' )			// normal (not escaped) char
      {
      const int ch2 = buffer.pgetc( p );
      if( ch2 < 0 || ( !icase && ch1 != ch2 ) ||
          ( icase && ISO_8859::tolower( ch1 ) != ISO_8859::tolower( ch2 ) ) )
        retval = 1;
      continue;
      }

    if( ++i >= regex.size() ) { retval = 2; break; }
    ch1 = regex[i];
    switch( ch1 )	// ch1 is first (maybe only) of a escape secuence
      {
      case '^': if( p != buffer.bol( p ) || p == buffer.eof() ) retval = 1; break;
      case '$': if( p != buffer.eol( p ) ) retval = 1; break;
      case '<': if( !buffer.pisbow( p ) ) retval = 1; break;
      case '>': if( !buffer.piseow( p ) ) retval = 1; break;
      case '?': {
                const int ch2 = buffer.pgetc( p );
                if( ch2 < 0 ) { retval = 1; break; }
                pieces.push_back( std::string( 1, ch2 ) );
                } break;
      case '*': {			// Find shortest matching sequence
                const int ps = pieces.size();
                std::string piece;
                while( true )
                  {
                  const int ret = match_regex( buffer, p, regex, i + 1, pieces, icase );
                  if( ret != 1 ) { retval = ret; break; }
                  const int ch2 = buffer.pgetc( p );
                  if( ch2 < 0 ) { retval = 1; break; }
                  piece += ch2;
                  }
                if( retval ) break;
                pieces.insert( pieces.begin() + ps, piece );
                i = regex.size();
                } break;
      case 'c': {
                const Point begin = p;
                Point end = p;
                const int ps = pieces.size();
                while( true )
                  {
                  const int ret = match_regex( buffer, p, regex, i + 1, pieces, icase );
                  if( ret != 1 ) { retval = ret; break; }
                  const int ch2 = buffer.pgetc( p );
                  if( ch2 < 0 || ch2 == ')' || ch2 == ']' || ch2 == '}' )
                    { retval = 1; break; }
                  if( ch2 == '(' || ch2 == '[' || ch2 == '{' )
                    {
                    p = end;
                    if( buffer.set_to_matching_delimiter( p, true, true ) <= 0 ||
                        !buffer.pnext( p ) ) { retval = 1; break; }
                    }
                  end = p;
                  }
                if( retval ) break;
                std::string piece;
                buffer.to_string( begin, end, piece );
                pieces.insert( pieces.begin() + ps, piece );
                i = regex.size();
                } break;
      case '[': {
                Set set( regex, i );	// Set of characters to match
                if( !set.parsed_len() ) { retval = 2; break; }
                const int ch2 = buffer.pgetc( p );
                if( ch2 < 0 || !set.includes( ch2 ) ) { retval = 1; break; }
                i += set.parsed_len() - 1;
                pieces.push_back( std::string( 1, ch2 ) );
                } break;
      case '+': retval = case_plus( buffer, p, regex, i, pieces, icase );
                break;
      case 'S': {			// Find longest matching sequence
                std::string piece;
                while( true )
                  {
                  Point pn = p;
                  const int ch2 = buffer.pgetc( pn );
                  if( ch2 < 0 || !ISO_8859::isspace( ch2 ) ) break;
                  p = pn; piece += ch2;
                  }
                if( piece.size() ) pieces.push_back( piece );
                else retval = 1;
                } break;
      case 's': {			// Find shortest matching sequence
                const int ps = pieces.size();
                std::string piece;
                while( true )
                  {
                  const int ret = match_regex( buffer, p, regex, i + 1, pieces, icase );
                  if( ret != 1 ) { retval = ret; break; }
                  const int ch2 = buffer.pgetc( p );
                  if( ch2 < 0 || !ISO_8859::isspace( ch2 ) ) { retval = 1; break; }
                  piece += ch2;
                  }
                if( retval == 0 )
                  { pieces.insert( pieces.begin() + ps, piece ); i = regex.size(); }
                } break;
      case 'T': {			// Find longest matching sequence
                std::string piece;
                while( true )
                  {
                  const int ch2 = buffer[p];
                  if( ch2 == ' ' || ch2 == '\t' || ch2 == 0xA0 )
                    { piece += ch2; buffer.pnext( p ); continue; }
                  if( ( ch2 < 0 || ch2 == '\n' ) && piece.size() )
                    pieces.push_back( piece );
                  else retval = 1;
                  break;
                  }
                } break;
      case 'W':
      case 'w': {
                const int ch2 = buffer.pgetc( p );
                if( ch2 >= 0 && ( ch1 == 'w' ) == ISO_8859::isalnum_( ch2 ) )
                  pieces.push_back( std::string( 1, ch2 ) );
                else retval = 1;
                } break;
      default: {
               int len, cht = ISO_8859::escape( regex, i, &len );
               if( cht < 0 ) { retval = 2; break; }
               ch1 = cht;
               const int ch2 = buffer.pgetc( p );
               if( ch2 < 0 ||
                   ( icase && ISO_8859::tolower( ch1 ) != ISO_8859::tolower( ch2 ) ) ||
                   ( !icase && ch1 != ch2 ) ) retval = 1;
               else i += len - 1;
               }
      }
    }
  if( retval )
    { p = old_p;
    if( pieces_old_size != pieces.size() ) pieces.resize( pieces_old_size ); }
  return ( retval );
  }

} // end namespace Regex


/* Search 'buffer' from 'p1' ('p1-1' if backward) for 'regex'.
   If found, set 'p1' at the first char of, and 'p2' after the end of, the
   substring found, and return in 'pieces' the text matching every '\*',
   '\?', etc, from the regular expresion.
   Return 0 if match, 1 if no match, 2 if trouble.
*/
int Regex::find( const Basic_buffer & buffer, Point & p1, Point & p2,
                 const std::string & regex, std::vector< std::string > & pieces,
                 const bool icase, const bool backward )
  {
  if( regex.empty() ) return 0;
  if( pieces.size() ) pieces.clear();

  const unsigned bs = regex.find('\\');
  if( bs > 0 )
    {
    const std::string header( regex, 0, bs );
    const bool same_size = ( header.size() == regex.size() );
    if( backward ) while( true )
      {
      if( !buffer.rfind_text( p1, header, icase ) ) return 1;
      p2 = p1;
      if( same_size ) { buffer.pseek( p2, header.size() ); return 0; }
      const int ret = match_regex( buffer, p2, regex, 0, pieces, icase );
      if( ret != 1 ) return ret;
      if( !buffer.pprev( p1 ) ) return 1;
      }
    else while( true )
      {
      if( !buffer.find_text( p1, header, icase ) ) return 1;
      p2 = p1; buffer.pseek( p1, -header.size() );
      if( same_size ) return 0; else p2 = p1;
      const int ret = match_regex( buffer, p2, regex, 0, pieces, icase );
      if( ret != 1 ) return ret;
      if( !buffer.pnext( p1 ) || p1 == buffer.eof() ) return 1;
      }
    }

  if( backward ) while( true )
    {
    if( !buffer.pprev( p1 ) ) return 1; else p2 = p1;
    const int ret = match_regex( buffer, p2, regex, 0, pieces, icase );
    if( ret != 1 ) return ret;
    }
  else while( true )
    {
    p2 = p1;
    const int ret = match_regex( buffer, p2, regex, 0, pieces, icase );
    if( ret != 1 ) return ret;
    if( !buffer.pnext( p1 ) || p1 == buffer.eof() ) return 1;
    }
  }


/* Delete block at [p1,p2) and replace it with the text produced from
   'regex' and 'pieces'.
   If there is no error, return true and set 'p2' after the newly
   inserted text.
*/
bool Regex::replace( Buffer & buffer, const Point & p1, Point & p2,
                     const std::string & regex,
                     const std::vector< std::string > & pieces )
  {
  const Basic_buffer matched( buffer, p1, p2 );
  Basic_buffer tmp;
  Point p = tmp.bof();
  bool fail = false;

  for( unsigned i = 0; i < regex.size() && !fail; ++i )
    {
    unsigned char ch1 = regex[i];
    if( ch1 != '\\' )			// normal (not escaped) char
      { tmp.pputc( p, ch1 ); continue; }

    if( ++i >= regex.size() ) { fail = true; break; }
    ch1 = regex[i];	// ch1 is first (maybe only) of a escape secuence
    if( std::isdigit( ch1 ) )
      {
      const unsigned n = ch1 - '0';
      if( n < pieces.size() )
        for( unsigned j = 0; j < pieces[n].size(); ++j )
          tmp.pputc( p, pieces[n][j] );
      else fail = true;
      }
    else if( ch1 == '&' || ch1 == 'c' || ch1 == 'l' || ch1 == 'u' )
      {
      if( !matched.empty() )
        {
        const Point begin = p;
        tmp.pputb( p, matched, matched.bof(), matched.eof() );
        if( ch1 == 'c' ) tmp.capitalize( begin, p );
        else if( ch1 == 'l' ) tmp.to_lowercase( begin, p );
        else if( ch1 == 'u' ) tmp.to_uppercase( begin, p );
        }
      }
    else
      {
      int len, cht = ISO_8859::escape( regex, i, &len );
      if( cht >= 0 ) { tmp.pputc( p, cht ); i += len - 1; }
      else fail = true;
      }
    }
  if( !fail && !buffer.replace( p1, p2, tmp, tmp.bof(), tmp.eof() ) )
    fail = true;
  return !fail;
  }


// Return true if 'regex' from i1 matches 'name' from i2
//
bool Regex::match_filename( const std::string & regex, const std::string & name,
                            unsigned i1, unsigned i2 )
  {
  for( ; i1 < regex.size(); ++i1 )
    {
    unsigned char ch1 = regex[i1];
    if( ch1 == '*' )
      {
      if( i1 + 1 >= regex.size() ) return true;		// tail match
      for( ; i2 < name.size(); ++i2 )
        if( match_filename( regex, name, i1 + 1, i2 ) ) return true;
      return false;
      }
    if( i2 >= name.size() ) return false;
    switch( ch1 )
      {
      case '?': ++i2; break;
      case '[': {
                Set set( regex, i1 );		// Set of characters to match
                if( !set.includes( name[i2++] ) ) return false;
                i1 += set.parsed_len() - 1;
                } break;
      default: if( ch1 == '\\' )
                 {
                 int len, cht = ISO_8859::escape( regex, i1 + 1, &len );
                 if( cht < 0 ) return false;
                 ch1 = cht; i1 += len;
                 }
               if( ch1 != (unsigned char)name[i2++] ) return false;
               break;
      }
    }
  return ( i2 >= name.size() );
  }
