#!/usr/local/bin/perl
##
## Program: thaiidx.pl
## Date: 2001-12-17 mo
## Author: chakka
## To: Encoding and decoding Thai words for makeindex program. 
## Use: together with LaTeX and makeindex as below
##  latex thaitest
##  rm thaitest.id
##  mv thaitest.idx thaitest.id
##  perl thaiidx.pl -e < thaitest.id > thaitest.idx
##  makeindex thaitest
##  rm thai.in
##  mv thai.ind thai.in
##  perl thaiidx.pl -d < thaitest.in > thaitest.ind
##  latex thaitest
##

$escch=chr(0x7f);
$ENG=0;
$THA=1;

$DEBUGCHSTATE=0;

sub printchstate {
  if ($DEBUGCHSTATE) {
    if ($chstate==$ENG) {
      print '{E}';
    } else {
      print '{T}';
    }
  }
}

if ($ARGV[0] eq '-e') { # if progoptions
#
# Encoding part with option -e
#
#   \item ͧ\wbr ອç, 1
# -->
#   \item ͧ!03:1,!00,!\wbr ୨ç!06:5,!00,!, 1 
#

@encodelist=();
while (<STDIN>) { # while encode-file
  chomp;

  $lastchstate=$ENG;

  $lineoutstr='';
  $i=0;
  while ($i<length($_)) { # while encode-line
    $ch=substr($_,$i,1);

    if (ord($ch)>=0x80 && ord($ch)<=0xef) {
      if ($lastchstate==$ENG) {
        $outstr='';
        $auxstr='';
        $swapstr='';
        $j=0;
      }

      if (ord($ch)>=0xe0 && ord($ch)<=0xe4) {
        $swapstr.=sprintf "%02d,", $j;
        $i++;$j++;
        $nextch=substr($_,$i,1);
        $outstr.="$nextch$ch";
        $i++;$j++;
      } elsif (ord($ch)>=0xe7 && ord($ch)<=0xee) {
        $w=ord($ch)-0xe7;
        $auxstr.=sprintf "%02d:%d,", $j-1, $w;
        $i++;
      } else {
        $outstr.=$ch;
        $i++;$j++;
      }
 
      $lastchstate = $THA;
    } else {
      if ($lastchstate==$THA) {
        $encodestr = "$outstr$escch$auxstr$escch$swapstr$escch";
        $lineoutstr .= $encodestr;
      }

      $lineoutstr .= $ch;
      $i++;

      $lastchstate = $ENG;
    }

  } # endwhile decode-line
  if ($lastchstate==$THA) {
    $encodestr = "$outstr$escch$auxstr$escch$swapstr$escch";
    $lineoutstr .= $encodestr;
  }

  print "$lineoutstr\n";
} # endwhile decode-file

} elsif ($ARGV[0] eq '-d') {  # elsif progoptions
#
# Decoding part with option -e
#
#   \item ͧ!03:1,!00,!\wbr ୨ç!06:5,!00,!, 1 
# -->
#   \item ͧ\wbr ອç, 1
#
while (<STDIN>) { # while decode-file
  chomp;
  
  &printchstate;

  $encodestr=$_;
  $ch = substr($encodestr,0,1);

  if (ord($ch) >= 0x80 && ord($ch) <= 0xef) {
    $chstate=$THA;
    $lastchstate=$ENG;
  } else {
    $chstate=$ENG;
  }

  &printchstate;

  $outdecodestr='';
  $encinidx=0;
  while ($encinidx<=length($encodestr)) { # while decodeline
    $ch = substr($encodestr,$encinidx,1);

    if (ord($ch) >= 0x80 && ord($ch) <= 0xef) {
      $chstate=$THA;
    }

    if ($markcount==3) {
      $chstate=$ENG;
      $markcount=0;
    }

    if ($chstate==$THA) { # if deTHAENG
      if ($lastchstate==$ENG) {
        $encidx=0; # index for running along $encodestr
        $auxidx=0; # index for auxilary part
        $swapidx=0; # index for swaping part
        $auxposv=0; # temp var for aux position
        $markcount=0; # escape sequence counter
        $swapposv=0; # temp var for swap position
        $enctextlen=0; # length of the sorting Thai part
        $encstartidx=$encinidx; # position of the current Thai word
        @swappos=();
      }
  
      if ($ch eq "$escch") { # if domarkcount
        $markcount++;
      } elsif ($markcount==0) { # elsif domarkcount
        $enctextlen++;
        $austate=0;
        # $austate -> 
        #  0 : reading position, 
        #  1 : reading character
        @auxpos=();
        @auxch=();
        $auxposv=0;
      } elsif ($markcount==1) { # elsif domarkcount
        if ($austate==0) {
          if ($ch eq ':') {
            $austate++;
            $auxpos[$auxidx] = $auxposv;
            $auxposv=0;
          } else {
            $auxposv = $auxposv*10 + (ord($ch)-0x30);
          } 
        } elsif ($austate==1) {
          if ($ch eq ',') {
            $austate=0;
            $auxidx++;
          } else {
            $auxch[$auxidx] = chr(0xe7 + ord($ch)-0x30);
          }
        }
      } elsif ($markcount==2) { # elsif domarkcount
        if ($ch eq ',') {
          $swappos[$swapidx]=$swapposv;
          $swapidx++;
          $swapposv = 0;
        } else {
          $swapposv = $swapposv*10 + (ord($ch)-0x30);
        }
      } # endif domarkcount 

      # Case $markcount == 3 is independent of the first two cases
      # This will clear up everything and send out the decoded string
      if ($markcount==3) { # if domarkcount3
        $tmpencodestr = '';
        $swapidx=0;
        $i=0;
        while ($i<$enctextlen) { # while decode-swap
          $ch = substr($encodestr, $encstartidx+$i, 1);
          if ($swapidx<=$#swappos && $i==$swappos[$swapidx]) {
            $nextch = substr($encodestr, $encstartidx+$i+1, 1);
            $tmpencodestr .= "$nextch$ch";
            $swapidx++;
            $i++;
          } else {
            $tmpencodestr .= $ch;
          }

          $i++;
        } # endwhile decode-swap

        $decodestr='';
        $i=0;
        $auxidx=0;
        while ($i <= $enctextlen) { # while decode-auxch
          $ch = substr($tmpencodestr, $i, 1);

          if ($auxidx <= $#auxpos) {
            if ($i <= $auxpos[$auxidx]) {
              $decodestr .= $ch;
            } else {
              $decodestr .= "$auxch[$auxidx]$ch";
              $auxidx++;
            }
          } else {
            $decodestr .= $ch;
          }

          $i++;
        } # endwhile decode-auxch

        $outdecodestr .= $decodestr;
      } # endif domarkcount3

      $encidx++;
    } else { # else deTHAENG
      # start ENG processing, just copy over the characters

      $outdecodestr .= $ch;
    } # endif deTHAENG

    $lastchstate = $chstate;

    $encinidx++;
  } # endwhile decodeline
 
  print "$outdecodestr";
  printchstate;
  print "\n";
} # endwhile decode-file 

} # endif progoptions

##
## Notes:
##

##
## File: thaiidx.pl
##
