#!/bin/bash

# Description : A jiffy to search CTAN file list
# Author      : Anh K. Huynh <xkyanh@gmail.com>
# Version     : $Id: ctan 82 2009-09-30 01:51:59Z pi $
# License     : LPPL (any version)
# Homepage    : CTAN:/support/ctan.tools
# Document    : self documented, try `$0 about`
# Requirements: bash, lynx, wget, tar, bzip2,
#               sed, sort, awk, (e)grep, cut
# TODO        : fancy get options (code/interface)

export PATH=$PATH:$HOME/bin

CTAN_MIRROR=http://mirror.ctan.org/
CTAN_ARCHIVE=http://ctan.org/tex-archive          # to be updated
CTAN_CONTRIB=$CTAN_ARCHIVE/macros/latex/contrib/  # to be updated
CTAN_FILES=$CTAN_ARCHIVE/FILES.byname             # to be updated
VIETTUG_CACHEDIR=http://viettug.org/ctan.tools/
DESTDIR=$HOME/.ctan/
TOOLS="lynx wget tar bzip2 sed sort awk grep"
HAVE_LZMA=0

# functions

msg()
{
  echo -e ":: $*"
}

# check for executable tool

ctan_check()
{
  type $1 2>/dev/null 1>&2
  if [ $? -gt 0 ]; then
    ERROR=1
    msg "not found '$1'"
    return 1
  fi  
}

# check for all tools

ctan_check_all()
{
  msg "checking dependencies..."
  ERROR=0
  for tool in $TOOLS; do
    ctan_check $tool
  done
  if [ $ERROR = 1 ]; then
    msg "error occured. exit 1"
    exit 1
  fi
  msg "tools found: $TOOLS"

  ctan_check lzma
  if [ $? = 0 ]; then
    HAVE_LZMA=1
  fi
  
  return 0
}

# mirror detection
# see also http://viettug.org/blogs/show/310

ctan_mirror_select()
{
  msg "Hello, this is ctan_mirror_select"
  src="$1"
  msg "found $src"
  while true; do
    location="`wget --max-redirect=0 $src -O - 2>&1 1>/dev/null| grep Location: 2>/dev/null`"
    if [ $? -eq 0 ]; then
      src="`echo $location | cut -d' ' -f2`"
      msg "found $src"
    else
      break
    fi
  done
  CTAN_ARCHIVE="$src"
  CTAN_CONTRIB="$CTAN_ARCHIVE/macros/latex/contrib/"
  CTAN_FILES="$CTAN_ARCHIVE/FILES.byname"
  msg "mirror detected: $CTAN_ARCHIVE"
}

# show information after update

ctan_info()
{
    msg "* `wc -l $DESTDIR/packages.txt |awk '{print $1}'` package(s) found"   
    msg "* `wc -l $DESTDIR/files.txt |awk '{print $1}'` file(s) found"   
}

# update database

ctan_update()
{
  if [ "x$1" = "x--first-time" ]; then
    shift
    msg "This maybe the first time you use the tool."
    msg "The directory ~/.ctan will be created"
  fi

  mkdir -pv $DESTDIR

  # using the cache file http://kyanh.net/ctan.tools/cache.tbz

  if [[ ( "x$1" == "x" ) || ( "x$1" == "x--use-cache" ) ]]; then
    shift
    msg "update by using cache file..."

    if [ $HAVE_LZMA = 1 ]; then
      msg "getting $VIETTUG_CACHEDIR/ctan.tools/cache.tlzma..."
      wget $VIETTUG_CACHEDIR/cache.tlzma -O $DESTDIR/cache.tlzma

      msg "uncompressing $DESTDIR/cache.tlzma..."
      lzma --decompress < $DESTDIR/cache.tlzma | tar xvf - -C $DESTDIR
    else
      msg "getting $VIETTUG_CACHEDIR/ctan.tools/cache.tbz..."
      wget $VIETTUG_CACHEDIR/cache.tbz -O $DESTDIR/cache.tbz

      msg "uncompressing $DESTDIR/cache.tbz..."
      tar xfjv $DESTDIR/cache.tbz -C $DESTDIR
    fi

    msg "now remove some big files"
    rm -fv $DESTDIR/{dump_files.txt,cache.tbz,cache.tlzma}

    ctan_info

    return 0
  fi

  # updating by downloading database from CTAN
  # previous version uses a search result returned by CTAN
  # since 1.5.0, Jim Hefferon suggest to use FILES.byname
  # see also (ctan_joiner)

  msg "update will get about 8MB from CTAN and may take very long time."
  read -n1 -p":: Are you sure you wanna do this (y/N)? " reply
  echo ""
  if [ ! "x$reply" = "xy" ]; then
    msg "Update was cancelled."
    return 0
  fi

  ctan_mirror_select $CTAN_MIRROR

  msg "searching all files. please wait as this is a heavy task"
  lynx -dump $CTAN_FILES > $DESTDIR/tmp0.txt
  # cp $HOME/projects/ctan.tools/FILES.byname $DESTDIR/tmp0.txt
  if [ $? -gt 0 ]; then
    msg "failed to download from CTAN. exit 1"
    rm -fv $DESTDIR/tmp0.txt
    exit 1
  fi

  msg "getting list of LaTeX packages..."
  ctan_joiner $DESTDIR/tmp0.txt \
    |sed -e 's# ##g' \
    > $DESTDIR/files.txt
  pkg_filter='|macros/latex/contrib/'
  grep $pkg_filter $DESTDIR/files.txt \
    |grep -v 'info/' \
    |grep -E '\.(zip|sty|dtx)' \
    |sed -e 's#/[^/]*$##g' \
    |sed -e "s#$pkg_filter#|#g" \
    |sort -t'|' -uk3 \
    > $DESTDIR/packages.txt
  msg "removing temporary file..."  
  rm -fv $DESTDIR/tmp0.txt

  msg "compressing the results..."
  cd $DESTDIR

  msg "creating tbz archive..."
  tar -cvj -f .cache.tbz {files,packages}.txt
  mv .cache.tbz cache.tbz
  
  if [ $HAVE_LZMA = 1 ]; then
    msg "creating lzma archive..."
    tar -cv --use-compress-program lzma -f .cache.tlzma {files,packages}.txt
    mv .cache.tlzma cache.tlzma
  fi

  ctan_info

  if [[ "x$1" == "x--keep-only-cache" ]]; then
    msg "removing all temporary files. Only cache is kept"
    rm -fv $DESTDIR/*.txt
  fi
}

# search something

ctan_search()
{
  [ -d $DESTDIR/ ] || ctan_update --first-time --use-cache
  if [ "x$1" = "x-file" ]; then
    shift
    grep $* $DESTDIR/files.txt \
      |grep -v obsolete \
      |awk -F '|' '{printf("%s | %s\t| %s\n",$1, $2, $3)}'
  else # search package name  
    grep $* $DESTDIR/packages.txt \
      |grep -v obsolete \
      |awk -F '|' '{printf("%s | %s\t| %s\n",$1, $2, $3)}'
  fi  
  if [ $? -gt 0 ]; then
    msg "no package/file found"
    return 1
  fi
}

# download files

ctan_get()
{
  RETFOUND=$DESTDIR/tmp2.txt
  RETDOWN=$DESTDIR/tmp.txt
  ctan_search $* > $RETFOUND

  if [ $? -gt 0 ]; then
    msg "no package matches '$*'"
    return
  fi
  if [ "x$1" = "x-file" ]; then
    shift
    ctan_grep_type='file'
  else
    ctan_grep_type='pkg'
  fi

  cat $RETFOUND
  awk -F'|' '{print $3}' $RETFOUND > $RETDOWN

  if [ "$ctan_grep_type" = "file" ]; then    
    read -n 1 -p ":: Are you sure you want to download (y/N)? " download
    echo ''
    if [[ "x$download" == "xy" ]]; then
      ctan_mirror_select $CTAN_MIRROR
      for pkg in `cat $RETDOWN`; do
        msg "downloading $pkg..."
        wget -nv -c "$CTAN_ARCHIVE/$pkg";
      done
    fi
  else
    read -n 1 -p "Are you sure you want to download (y/N)? " download
    echo ''
    if [[ "x$download" == "xy" ]]; then
      ctan_mirror_select $CTAN_MIRROR
      for pkg in `cat $RETDOWN`; do
        msg "downloading ${pkg}.zip..."
        wget -nv -c "$CTAN_CONTRIB/${pkg}.zip";
      done  
    fi
  fi
}

#
# awk script to join lines in FILES.byname
# some server wraps long lines hence we need this script
#
ctan_joiner() 
{
  awk '
    BEGIN {
      a = ""
    }
    {
      if ( $0 ~ /^[0-9]+/ ) {
        if (a != "") print a
        a=$0
      }
      else{
        a = (a $0)
      }
    }
    END {
      if ( $0 !~ /^[0-9]+/ ) {
        a = (a $0)
      }
      else {
        a = $0
      }
      if (a != "") print a
    }' \
    $*
}

ctan_about()
{
  msg "ABOUT"
  msg "\tThis jifty tool helps you to seach/download LaTeX packages listed at"
  msg "\t\t$CTAN_CONTRIB"
  msg "\tor search/download any files in $CTAN_ARCHIVE (or its mirror)."
  msg ""
  msg "\tThe tool requires following programs"
  msg "\t\t$TOOLS"
  msg
  msg "\tThis tool was written by Anh K. Huynh <xkyanh@gmail.com>."
  msg "VERSION"
  msg "\t1.0.0 2008/05/11: first version"
  msg "\t1.1.0 2008/05/15: search files (use cache file)"
  msg "\t1.2.0 2008/05/16: smaller cache. Thanks to Karl Berry <karl@freefriends.org>"
  msg "\t1.3.0 2008/05/23: everything is cached. Faster. New options. Thanks to texer."
  msg "\t1.4.0 2009/09/12: auto. select ctan mirror (suggested by Jim Hefferon)" 
  msg "\t1.5.0 2009/09/13: load database from http://mirror.ctan.org/FILES.byname" 
  ctan_usage
  msg "TODO"
  msg "\t* package information supported"
  msg "\t* search by package description,..."
  msg "\t* package build script"
  msg "THANKS to"
  msg "\t* Karl Berry <karl@freefriends.org>"
  msg "\t* Nguyen Van Hanh <hanhnguyenvan@gmail.com>"
  msg "\t* Jim Hefferon <ftpmaint@alan.smcvt.edu>"
  msg "LICENSE"
  msg "\tThis tool is published under LPPL."
  msg "BUGS"
  msg "\tPlease report to kyanh <xkyanh@gmail.com>."
}

ctan_usage()
{
  msg "USAGE"
  msg "\tctan about           : show all information about this tool"
  msg "\tctan usage           : show usage"
  msg "\tctan version         : show script version"
  msg
  msg "\tctan update          : update using $VIETTUG_CACHEDIR/cache.tbz (~1MB)"
  msg "\tctan update --direct : update directly from CTAN. You are going to download 8MB"
  msg "\tctan grep <string>   : search packages match <string>. grep ability is supported"
  msg "\tctan get  <string>   : download packages match <string> to working directory"
  msg
  msg "\tIf you want to search files:"
  msg
  msg "\tctan fgrep <string>  : search files."
  msg "\tctan fget  <string>  : download files match <string> to working directory"
  msg "\tctan grep -file <string>"
  msg "\tctan get  -file <string>"
  msg "EXAMPLES"
  msg "\tctan grep theorem    # search packages match 'theorem'"
  msg "\tctan grep ^n         # search packages srated by 'n'"
  msg "\tctan get  ^n         # download packages started by 'n'"
  msg ""
  msg "\tctan get -file contrib/ntheorem.zip"
  msg "\tctan fget      contrib/ntheorem.zip"
  msg "\tctan fgrep     ntheorem | grep zip"
}

ctan_arg()
{
  if [ "x$1" = "x-file" ]; then
    shift
  fi  
  if [ "x$1" = "x" ]; then
    msg "missing parameter"
    exit 1
  fi
}

# check for requirements or not

not_check="xusage:xhelp:xabout:xdoc:xversion:xcheck:"
if ! `echo $not_check | grep -q "x$1"`; then
  ctan_check_all
fi

# main program

case "x$1" in
"xupdate")
  shift
  ctan_update $*
  ;;
"xfgrep")
  shift
  ctan_arg $*
  ctan_search -file $*
  ;;
"xgrep")
  shift
  ctan_arg $*
  ctan_search $*
  ;;
"xget")
  shift
  ctan_arg $*
  ctan_get $*
  ;;
"xinfo")
  shift
  ctan_info
  ;;
"xfget")
  shift
  ctan_arg $*
  ctan_get -file $*
  ;;
"x")  ctan_usage;;
"xusage") ctan_usage;;
"xhelp") ctan_usage;;
"xdoc") ctan_about;;
"xabout") ctan_about;;
"xversion") msg '$Id: ctan 82 2009-09-30 01:51:59Z pi $';;
"xcheck") ctan_check_all;;
*)
  msg "wrong parameter. please try 'ctan usage'"
  ;;
esac
