#!/bin/sh

# Set these paths appropriately
ROOT=${UPLUGSHARE}/ext/tree-tagger
BIN=$ROOT/bin
CMD=$ROOT/cmd
LIB=$ROOT/lib


OPTIONS="-token -lemma -sgml -pt-with-lemma"

TOKENIZER=${CMD}/tokenize.pl
TAGGER=${BIN}/tree-tagger
ABBR_LIST=${LIB}/german-abbreviations
PARFILE=${LIB}/german.par
LEXFILE=${LIB}/german-lexicon.txt
FILTER=${CMD}/filter-german-tags

$TOKENIZER -a $ABBR_LIST $* |
# external lexicon lookup
perl $CMD/lookup.perl $LEXFILE |
# tagging
$TAGGER $OPTIONS $PARFILE  | 
# error correction
$FILTER

# TOKENIZER=${BIN}/separate-punctuation
# TAGGER=${BIN}/tree-tagger
# ABBR_LIST=${LIB}/german-abbreviations
# PARFILE=${LIB}/german.par
# FILTER=${CMD}/filter-german-tags

# # put all on one line
# cat $* |
# # do tokenization
# $TOKENIZER +1 +s +l $ABBR_LIST |
# # remove empty lines
# grep -v '^$' |
# # tagging
# $TAGGER $PARFILE -token -lemma -sgml | 
# # error correction
# $FILTER

