#!/usr/bin/perl -w

# convert an html file to a text file

# Version 
# 1   1999 Aug 24 First version. Compensates for files Lynx cannot parse.
# 1.1 1999 Aug 24 Instead of special cases, put a time limit on lynx.

die unless (-e "$ARGV[0]");
$filename = $ARGV[0];

# Lynx can't handle framesets.  Sorry.
#$frameset = `grep "<FRAMESET" $filename`;
#exit if ($frameset =~ /./);

# Lynx can't handle files with no body.  Sorry.
#$size = `wc $filename`;
#($lines, $words, $chars) = $size =~ /^\s+(\d+)\s+(\d+)\s+(\d+)/;
#exit unless ($lines && $words && $chars);
#exit if ($lines < 10);
#exit if ($words < 10);
#exit if ($chars < 10);

# convert the html file to text with lynx
`ulimit -t 300; lynx -force_html -nolist -dump $filename > $filename.$$`;

open(IN, "$filename.$$");
while (<IN>) {

    # remove the [IMAGE], [LINK], and [INLINE] markers
    s/\[INLINE\]/. /g; 
    s/\[IMAGE\]/. /g; 
    s/\[LINK\]/. /g;
    
    print;
}

`rm $filename.$$`;
