 	weblint++                                                                              ¸˜›¸˜œ   mBIN                ‚Eu  #!/usr/bin/perl -w
# $Id: weblint++,v 1.2 2002/02/19 21:28:28 comdog Exp $
use strict;

=head1 NAME

	weblint++
	
=head1 SYNOPSIS

	weblint++ [-v [level] ] [-m [md5 digest] ] [-d file]
		[-u username -p password] url
	
=head1 DESCRIPTION

The C<weblint++> program fetches a web resource and runs the
response through an HTML lint filter as well as other tests.

You can use this program interactively if you specify the
C<-v> switch, or use it in batch mode by observing the 
exit status.

=head1 OPTIONS

=over4

=item -d file

The C<-d> switch performs a diff between the HTTP response
message body and the specified file.  The program exits
if they differ.

=item -m [md5 digest]

The C<-m> switch by itself reports the MD5 digest (in hex) of
the message body of the request from URL.  The program exits
if the digests do not match.

=item -p password

The C<-p> switch specifies the Basic authentication password.

=item -s file

The C<-s> switch specifies the file to save the HTTP message
body to.

=item -u username

The C<-u> switch specifies the Basic authentication user name.

=item -v [level]

The C<-v> switch turns on verbose reporting.  The greater the
value of C<level>, the more verbose the reporting.  If you do
not specify C<-v>, you will see no output, although you can 
observe the results from the exit status.

=back

=head1 ORDER OF TESTS

The program performs the tests, and possibly exits based on
errors, in this order:

	HTTP fetch
	MD5 digest comparison ( C<-m> switch )
	File content comparison ( C<-d> switch )
	HTML Lint warnings
	
=head1 EXIT STATUSES

=over 4

=item -1

The MD5 digest of the HTTP response message body did not match the digest
specified with C<-m>, if you specified one.

=item -2

The file specified with the C<-d> switch does not exist.

=item -3

The HTTP response message body differed from the content of the file
specified with <-d>.

=item < 0

The program encountered HTTP error.  The exit code is the HTTP response
code negated.  If the HTTP response was 404 (Not Found), the exit status
is -404.

=item > 0

C<HTML::Lint> found HTML errors.  The exit status is the number of HTML
errors.

=item 0

Success.  No HTTP errors, no MD5 digest mismatches, no HTML
warnings.

=back

=head1 TO DO

=head1 AUTHOR

brian d foy <bdfoy@cpan.org>

=head1 COPYRIGHT

Copyright 2000, brian d foy.  All rights reserved.

=head1 LICENSE

This program may be redistributed under the same turns as Perl
itself.

=head1 SCRIPT CATEGORIES

Web

=head1 SEE ALSO

L<HTML::Lint>

=cut

use vars qw( %opts $VERBOSE );

require 5.6.0;

use HTML::Lint;
require LWP::UserAgent;
require HTTP::Request;
require URI;

my $url = URI->new( pop @ARGV );

die "[$url] is not a valid URI\n" unless ref $url;

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# command line argument processing
while( my $arg = shift @ARGV )
	{
	unless( $arg =~ m/^-(.)/ )
		{
		shift @ARGV;
		next;
		}
	
	my $letter = $1;
	
	if( $ARGV[0] =~ m/^-/ or not @ARGV )
		{
		$opts{$letter} = 1;
		next;
		}
	
	$opts{$letter} = shift @ARGV;
	}

$VERBOSE = $opts{v};

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# HTTP objects
my $user_agent = LWP::UserAgent->new;
my $request = HTTP::Request->new( GET => $url );
$request->authorization_basic( $opts{u}, $opts{p} )
	if( exists $opts{u} and exists $opts{p} );
	
print $request->as_string if $VERBOSE > 1;

my $response = $user_agent->request( $request );

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# the tests
if( $response->is_success )
	{
	print $response->as_string if $VERBOSE > 1;
	my $data = $response->content;
	
	# # # save the data
	if( exists $opts{'s'} and $opts{'s'} ne '1' )
		{
			if( open(FILE, "> $opts{'s'}") )
				{
				print FILE $data;
				close FILE;
				}
			else
				{
				print STDERR "Could not open $opts{s} for writing: $!\n";
				}
		}
		
	# # # MD5 differences
	if( exists $opts{'m'} and $opts{'m'} )
		{
		require Digest::MD5;
		
		my $digest = Digest::MD5::md5_hex( $data );
		print "MD5 digest (hex) $digest\n" 
			if( exists $opts{'m'} and $VERBOSE );

		if( $opts{'m'} ne 1 and $opts{'m'} ne $digest )
			{
			print "MD5 digests do not match!\n",
				"Expected [$opts{m}] got [$digest]\n" if $VERBOSE;
			exit -1;
			}
		}

	# # # File differences
	if( exists $opts{'d'} and $opts{'d'} and -e $opts{'d'} )
		{
		require Text::Diff;
		
		my $diff = Text::Diff::diff( $opts{'d'}, \$data );
				
		unless( $diff eq '0' )
			{
			print "Files are different\n$diff\n" if $VERBOSE;
			exit -3;
			}

		print "Response is same as $opts{d}\n" if $VERBOSE;
		}
	elsif( exists $opts{'d'} and $opts{'d'} and not -e $opts{'d'} )
		{
		print STDERR "File $opts{'d'} does not exist\n" if $VERBOSE;
		
		exit -2;
		}
		
	my $lint = HTML::Lint->new();
	
	$lint->parse( $data );

	my $errors = $lint->errors();
	
	exit 0 unless $errors;
	
	if( $VERBOSE )
		{
		foreach my $error ( $lint->errors() ) 
			{
			print $error->as_string(), "\n";
			}
		}
		
	exit $errors;	
	}
else
	{
	print STDERR "Could not fetch resource [",
		$response->code, "]" if $VERBOSE;
		
	exit -( $response->code );
	}


                                                                                                    