#!/usr/bin/perl -w

=head1 NAME

mech-dump - Dumps information about a web page

=cut

use warnings;
use strict;
use WWW::Mechanize;
use Getopt::Long;
use Pod::Usage;

my @actions;
my $absolute;

my $agent;
my $agent_alias;

GetOptions(
    forms           => sub { push( @actions, \&dump_forms ); },
    links           => sub { push( @actions, \&dump_links ); },
    images          => sub { push( @actions, \&dump_images ); },
    all             => sub { push( @actions, \&dump_forms, \&dump_links, \&dump_images ); },
    absolute        => \$absolute,
    'agent=s'       => \$agent,
    'agent-alias=s' => \$agent_alias,
    help            => sub { pod2usage(1); },
) or pod2usage(2);

=head1 SYNOPSIS

mech-dump [options] [file|url]

Options:

    --forms         Dump table of forms (default action)
    --links         Dump table of links
    --images        Dump table of images
    --all           Dump all three of the above, in that order

    --agent=agent   Specify the UserAgent to pass
    --agent-alias=alias
                    Specify the alias for the UserAgent to pass.
                    Pick one of:
                        * Windows IE 6
                        * Windows Mozilla
                        * Mac Safari
                        * Mac Mozilla
                        * Linux Mozilla
                        * Linux Konqueror

    --absolute      Show URLs as absolute, even if relative in the page
    --help          Show this message

The order of the options specified is relevant.  Repeated options
get repeated dumps.

=cut

my $uri = shift or die "Must specify a URL or file to check.  See --help for details.\n";
if ( -e $uri ) {
    require URI::file;
    $uri = URI::file->new_abs( $uri )->as_string;
}

@actions = (\&dump_forms) unless @actions;

my $mech = WWW::Mechanize->new( cookie_jar => undef );
if ( defined $agent ) {
    $mech->agent( $agent );
}
elsif ( defined $agent_alias ) {
    $mech->agent_alias( $agent_alias );
}

my $response = $mech->get( $uri );
$response->is_success or die "Can't fetch $uri\n", $response->status_line, "\n";
$mech->is_html or die qq{$uri returns type "}, $mech->ct, qq{", not "text/html"\n};

for my $action ( @actions ) {
    $action->( $mech );
}

sub dump_links {
    my $mech = shift;
    for my $link ( $mech->links ) {
        my $url = $absolute ? $link->url_abs : $link->url;
        print "$url\n";
    }
    return;
}

sub dump_images {
    my $mech = shift;

    for my $image ( $mech->images ) {
        my $url = $absolute ? $image->url_abs : $image->url;
        print "$url\n";
    }
    return;
}

sub dump_forms {
    my $mech = shift;

    for my $form ( $mech->forms() ) {
        print $form->dump;
        print "\n";
    }
    return;
}

=head1 TODO

=over 4

=item * Options for C<--user>, C<--pass> and C<--proxy>.

=back

=cut
