#!/usr/local/bin/perl -p

# $Id: rehtml 776 2006-01-30 00:42:36Z marknodine $

# Converts an HTML file to a regular-expression HTML

# Protect regular expression characters (do this FIRST)
s/([\(\)\[\]\{\}\.\+\*\?\|\\\^\$\@\%])/\\$1/g;

# Don't require match on id numbers
s/id\d+/id\\d+/g;
# Handle line numbers in error messages
s!\S+(/\S+\\\.(?:pm|wrt),? line )(\d+)!.*$1($2|\\d+)!g;
# Handle the "Generated on" date
s/(Generated on:\s*).*/$1.*/;
# Handle the "Generated by" description
s/(Generated by ).*Docutils.*/$1.*/;
# Accommodate different date formats
BEGIN {
    $time = '(\s+\d+:\d+:\d+(\s+[-+]\d+(\s+\(.+?\))?)?)?';
    ($timere = $time) =~ s/\\([()])/\\\\\\$1/g;
}
s!(\d{4})([-/])(\d{2})\2(\d{2})$timere!\\d{4}[-/\]\\d{2}\[-/\]\\d{2}$time!go;
# Handle the docutils string
s/"Docutils[^\"]+"/".*"/g;
# Handle the prest string
s/"prest(\\.prl)?[^\"]+"/".*"/g;
# Handle the prest version
s/(prest)(\\.prl)?( release )\S+/${1}(${2})?${3}\\S+/g;
# Accommodate different stylesheets
s|(<link rel="stylesheet" href=")[^\"]*(" type="text/css" />)|$1.*$2|;
# Get rid of junque characters in generated section numbers
s/   /.*/; 
# Deal with revisions that may change
if ($AFTER_REVISION) {
    s/\d+(\\\.\d+)+/\\d+(\\.\\d+)+/;
    $AFTER_REVISION = 0;
}
$AFTER_REVISION = m|<th class="docinfo-name">Revision:</th>|;

# Now, handle things I wasn't able to reproduce perfectly

# Be tolerant of lists that may or may not be simple
s/(<[ou]l)( class="simple")?(>)/$1( class="simple")?$3/;
s/(<[ou]l) class="(?!simple)([a-s][^\"]+) simple"(>)/$1 class="$2( simple)?"$3/;
s/(<[ou]l) class="(?!simple)([t-z][^\"]+) simple"(>)/$1 class="(simple )?$2"$3/;
s/(<[ou]l) class="(?!simple)([^\"]+)"(>)/$1 class="$2( simple)?"$3/;
s/(<[ou]l) class="(?!simple)([a-s][^\"]+)"(>)/$1 class="$2( simple)?"$3/;
s/(<[ou]l) class="(?!simple)([t-z][^\"]+)"(>)/$1 class="(simple )?$2"$3/;

# Deal with perl eval numbers that may change in error messages
s/\\\((eval )(\d+)\\\)(\\\[.+\\\])?/\\($1\\d+\\).*/;
