# setext -> HTML converter
#
# $Id: setext.pl,v 2.5 1993/07/08 00:11:43 sanders Exp $
#
# Tony Sanders <sanders@bsdi.com>, June 1993
#
# Status of typotags:
#     header-tt		passed untouched (XXX: use Subject: in next release)
#     title-tt		<H1>...</H1> (and <TITLE> if needed)
#     subhead-tt	<H2>...</H2> (and <TITLE> if needed)
#     indent-tt		reflows paragraphs
#
#     bold-tt		<B>...</B>
#     italic-tt		<I>...</I>
#     underline-tt	<I>...</I>
#     hot-tt		<A HREF="...">...</A>		(see also href-tt)
#
#     quote-tt		<BLOCKQUOTE>...</BLOCKQUOTE>
# NIY bullet-tt		<UL>...</UL>
#
#     twobuck-tt	ignored
#     suppress-tt	suppressed in output
#     twodot-tt		ignored
#
# Additional typotags supported for HTML:
#     href-tt		.. _text HREF
#     isindex-tt        .. <isindex>
#
# setext'html -- converts setext (.etx files) to HTML
# setext'title -- utility routine to convert setext titles and subheads to HTML
#

# TODO:XXX
# I need to figure out how to allow HTML markup in the text while at the
# same time suppresing "unintentional" markup.  For now < & > are HTML'ized.

# Define the translations supported:
# $trans{'text/setext'}            = "text/html:setext'html";

# to use this as a filter run:
#   perl -e 'require "setext.pl"; package setext; &html;' < file.etx

package setext;

# parser states
$FMT = 0;	# in free flow text (normal HTML mode)
$PRE = 1;	# in preformated text <PRE>...</PRE>
$QUOTE = 2;	# in blockquote <BLOCKQUOTE>...</BLOCKQUOTE> (implies $FMT)

sub html {
    local($oldfd) = select(STDOUT);
    local($title, $state, $fold, $a);
    @data = <STDIN>; chop @data;

    # first pass, process <HEAD> items and hypertext link information
    print "<HEAD>\n";
    for ($i = 0; $i <= $#data; $i++) {
	$_ = $data[$i];			# $_ is default for m//

	# <ISINDEX> must be inside <HEAD>...</HEAD>
	/^\.\.\s+<isindex>/i &&
	    do { $data[$i] = ".."; print "<ISINDEX>\n"; next; };

	# locate HREF's:  .. _href URL
	/^\.\.\s+_([^\s]*)\s+(.*)\s*/ && do { $href{$1} = $2; next; };

	# first title-tt or subhead-tt gets <TITLE>...</TITLE>
	# &title also adds the <H#>...</H#> to the appropriate line
	/^===/ && do { &title("H1", $i); next; };
	/^---/ && do { &title("H2", $i); next; };
    }
    print "</HEAD>\n";

    # second pass, handle remaining typotags
    $state = $FMT;
    print "<BODY>\n";
    foreach $_ (@data) {
	/^\.\.\s+(<H.>)(.*)(<\/H.>)/i && do {
	    # XXX: what about blockquote? oh well, maybe latter
	    print "</PRE>\n" if $state == $PRE;
	    print $1, &htmlize($2), $3, "\n";
	    print "<PRE>\n" if $state == $PRE;
	    next;
	};
	next if /^\.\./;

	# line break unless in <PRE>
	$state != $PRE && /^\s*$/ &&
	    do { print "<P>\n" unless $fold++; next; };
	$fold = 0;

	$state != $QUOTE && /^>\s/ && &to_quote;
	$state == $QUOTE && !/^>\s/ && &to_fmt;
	$state != $FMT && /^  [^ ]/ && &to_fmt;
	$state == $FMT && !/^  [^ ]/ && &to_pre;

	s/^>\s*//;						# fix quote-tt
	s/^  ([^ ])/\1/;					# fix indent-tt

	s#\*\*([^\*]*)\*\*#\376B\377$1\376/B\377#;		# bold-tt
	s#~([^~]*)~#\376I\377$1\376/I\377#;			# italic-tt
	s#_([^\s]*)_#
	    ($a = $1) =~ s,_, ,g; "\376I\377$a\376/I\377"; #e;	# underline-tt
	# hot-tt
	s#\b([^\s]*)_\b#
	    $h = $href{$1}; ($a = $1) =~ s,_, ,g;
	    $h ? qq'\376A HREF="$h"\377$a\376/A\377' : "\376I\377$a\376/I\377"; #e;
	print &htmlize($_), "\n";
    }
    print "</PRE>\n</BODY>\n";
    select($oldfd);
}

sub to_fmt {
    print "</PRE>\n" if $state == $PRE;
    print "</PRE></BLOCKQUOTE>\n" if $state == $QUOTE;		#XXX
    $state = $FMT;
}
sub to_pre {
    print "<PRE>\n" if $state == $FMT;
    print "</PRE></BLOCKQUOTE><PRE>\n" if $state == $QUOTE;	#XXX
    $state = $PRE;
}
sub to_quote {
    print "<BLOCKQUOTE><PRE>\n" if $state == $FMT;		#XXX
    print "</PRE><BLOCKQUOTE><PRE>\n" if $state == $PRE;	#XXX
    $state = $QUOTE;
}
sub htmlize {
    local($_) = @_;
    s/\&/\&\#38\;/g; s/\</\&\#60\;/g; s/\>/\&\#62\;/g;
    s/\376/</g; s/\377/>/g;				        # convert back
    $_;
}
sub title {
    local($head, $i) = @_;
    $data[$i--] = ".."; $data[$i] =~ s/^\s*//;
    print "<TITLE>$data[$i]</TITLE>\n" unless $title++;
    $data[$i] = ".. <$head>" . $data[$i] . "</$head>";
}

1;
