#!/usr/bin/perl -w

#    kea-tidy-key-file.pl
#    Version 1.1

#    Kea -- Automatic Keyphrase Extraction
#    Copyright 1998-1999 by Gordon Paynter and Eibe Frank
#    Contact gwp@cs.waikato.ac.nz or eibe@cs.waikato.ac.nz
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

# Version history
#
# 1.0   Witten et.al.
# 1.1   First Distribution.  GPL added.

# kea-tidy-key-file.pl reads a list of keyphrases (one per line 
# from STDIN) and tidies them up and writes them to STDOUT.
# Intended for use with .key and .kea files.

while (<>) {

    # delete apostrophy
    s/\'//g;
    # remove non-alphanumeric characters
    s/[^A-Za-z0-9\. ]/ /g;
    # squash and trim whitespace
    s/^\s+//g;
    s/\s+$//g;
    s/\s+/ /g;
    if ($_ ne "") {
	print lc($_), "\n";
    }
}
