package Statocles::Plugin::LinkCheck;
# ABSTRACT: Check links and images for validity during build
$Statocles::Plugin::LinkCheck::VERSION = '0.060';
use Statocles::Base 'Class';
use Mojo::DOM;
use Mojo::Util qw( url_escape url_unescape );


has ignore => (
    is => 'ro',
    isa => ArrayRef[Str],
    default => sub { [] },
);


sub check_pages {
    my ( $self, $event ) = @_;

    my %page_paths = ();
    my %links = ();
    for my $page ( @{ $event->pages } ) {
        $page_paths{ $page->path } = 1;
        if ( $page->DOES( 'Statocles::Page::Document' ) ) {
            my $dom = Mojo::DOM->new( $page->render( site => $event->emitter ) );

            for my $attr ( qw( src href ) ) {
                for my $el ( $dom->find( "[$attr]" )->each ) {
                    my $url = $el->attr( $attr );
                    $url =~ s{#.*$}{};
                    next unless $url;
                    next if $url =~ m{^(?:[a-z][a-z0-9+.-]*):}i;
                    next if $url =~ m{^//};
                    if ( $url !~ m{^/} ) {
                        $url = $page->path->parent->child( $url );
                    }
                    $links{ url_unescape $url }{ $page->path }++;

                }
            }
        }
    }

    for my $link_url ( keys %links ) {
        $link_url .= 'index.html' if $link_url =~ m{/$};
        next if $page_paths{ $link_url } || $page_paths{ "$link_url/index.html" };
        next if grep { $link_url =~ /^$_/ } @{ $self->ignore };
        for my $page_url ( keys %{ $links{ $link_url } } ) {
            $event->emitter->log->warn( "URL broken on $page_url: '$link_url' not found" );
        }
    }

}

1;

__END__

=pod

=head1 NAME

Statocles::Plugin::LinkCheck - Check links and images for validity during build

=head1 VERSION

version 0.060

=head1 SYNOPSIS

    # site.yml
    site:
        class: Statocles::Site
        on:
            - build:
                $class: Statocles::Plugin::LinkCheck
                $method: check_pages

=head1 DESCRIPTION

This plugin checks all of the links and images to ensure they exist. If something
is missing, this plugin will write a warning to the screen.

=head1 ATTRIBUTES

=head2 ignore

An array of URL patterns to ignore. These are interpreted as regular expressions,
and are anchored to the beginning of the URL.

For example:

    /broken     will match "/broken.html" "/broken/page.html" but not "/page/broken"
    .*/broken   will match "/broken.html" "/broken/page.html" and "/page/broken"

=head1 METHODS

=head2 check_pages

    $plugin->check_pages( $event );

Check the pages inside the given
L<Statocles::Event::Pages|Statocles::Event::Pages> event.

=head1 AUTHOR

Doug Bell <preaction@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Doug Bell.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
