#! /usr/bin/perl -w
# A non-threaded perl link checker
# 
# Copyright (c) 2003-2006 imacat.
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# 
# First written 2003-04-12

use 5.005;
use strict;
use Config qw();
use Cwd qw(getcwd);
use File::Spec::Functions qw(splitdir catdir rel2abs);
use Getopt::Long qw(GetOptions);
use HTML::LinkExtor qw();
use HTTP::Request qw();
use HTTP::Cookies qw();
use LWP::RobotUA qw();
use Net::HTTP qw();
use Socket qw(inet_aton);
use Sys::Hostname qw(hostname);
use URI qw();
use URI::Escape qw(uri_escape uri_unescape);
# Prototype declaration
sub main();
sub parse_args();
sub check_links($);
sub fetch_redir($);
sub fetch_noredir($);
sub get_links($$);
sub proc_link_val($$);
sub is_excluded($);
sub get_ua($);
sub get_target($);
sub noreq_key($);
sub rep_uri($);

use vars qw($THIS_FILE $VERSION);
$0 =~ /[^\/]*$/;
$THIS_FILE = $&;
$VERSION = "3.00";

use vars qw(%UA %TARGETS %URIS %REFS $COOKIE_JAR);
use vars qw($LOCAL_TARGET @LOCALS %LFOUND $STARTDIR @REMOTES %RFOUND);
use vars qw($TOTAL $SUCCESS);
use vars qw(%NOREQ_HOSTS);
%UA = qw();
%TARGETS = qw();
%URIS = qw();
%REFS = qw();
%NOREQ_HOSTS = qw();
$COOKIE_JAR = new HTTP::Cookies;

use vars qw($RECURSIVE $PARENT $NOREMOTE @EXCLUDES $VERBOSE);
$RECURSIVE = 1;
$PARENT = 0;
$NOREMOTE = 0;
@EXCLUDES = qw();
$VERBOSE = 1;

use vars qw($UASTR $VERSTR $SHORTHELP $HELPMSG);
$UASTR = "$THIS_FILE-robot/$VERSION (using " . LWP::RobotUA->_agent . ")";
$SHORTHELP = "Try `$THIS_FILE --help' for more information.";
$VERSTR = "$& v$VERSION by imacat <imacat\@mail.imacat.idv.tw>";
$HELPMSG = << "EOT";
Usage: $THIS_FILE [options] url1 [url2 [url3 ...]]
Check links on the specific website.

  -1,--onelevel      Check the links on this page and stops.
  -r,--recursive     Recursively check through this site. (default)
  -b,--below         Only check the links below this directory. (default)
  -p,--parent        Trace back to the parent directories.
  -l,--local         Only check the links on this same host.
  -s,--span          Check the links to other hosts (without recursion).
                     (default)
  -e,--exclude path  Exclude this path.  Check for their existence but not
                     check the links on them, just like they are on a foreign
                     site.  Multiple --exclude are OK.
  -i,--include path  Include this path.  An opposite of --exclude that cancels
                     its effect.  The latter specified has a higher priority.
  -d,--debug         Display debug messages.  Multiple --debug to debug more.
  -q,--quiet         Disable debug messages.  An opposite that cancels the
                     effect of --debug.
  -h,--help          Display this help.
  -v,--version       Display version number.
  url1, url2, url3   The URLs of the websites to check against.

EOT

use vars qw(%ALLOWED_SCHEMES);
%ALLOWED_SCHEMES = map { $_ => 1 } qw(http https ftp file gopher);

main;
exit 0;


# main: Main program
sub main() {
    local ($_, %_);
    my @starts;
    
    @starts = parse_args;
    check_links $_ foreach @starts;
    
    print STDERR "Done.  " . (time - $^T) . " seconds elapsed.\n"
        if $VERBOSE > 0;
    return;
}

# parse_args: Parse the arguments
sub parse_args() {
    local $_;
    my @starts;
    
    # Get the arguments
    eval {
        local $SIG{__WARN__} = sub { die $_[0]; };
        Getopt::Long::Configure(qw(no_auto_abbrev bundling));
        GetOptions( "onelevel|1"=>sub { $RECURSIVE = 0; },
                    "recursive|r"=>sub { $RECURSIVE = 1; },
                    "below|b"=>sub { $PARENT = 0; },
                    "parent|p"=>sub { $PARENT = 1; },
                    "local|l"=>sub { $NOREMOTE = 1; },
                    "span|s"=>sub { $NOREMOTE = 0; },
                    "exclude|e=s"=>sub { unshift @EXCLUDES, "-" . $_[1]; },
                    "include|i=s"=>sub { unshift @EXCLUDES, "+" . $_[1]; },
                    "debug|d"=>sub { $VERBOSE++; },
                    "quiet|q"=>sub { $VERBOSE-- if $VERBOSE > 0; },
                    "help|h"=>sub { print $HELPMSG; exit 0; },
                    "version|v"=>sub { print "$VERSTR\n"; exit 0; });
    };
    die "$THIS_FILE: ERROR: $@" if $@ ne "";
    
    # Show progress
    $| = 1 if $VERBOSE > 2;
    
    @starts = qw();
    foreach my $uritext (@ARGV) {
        my $uri;
        $uri = new URI($uritext);
        # Single letter scheme is assumed to be a volumn label
        # This is for MSWin32
        $uri->scheme(undef) if defined $uri->scheme && $uri->scheme =~ /^[a-z]$/i;
        # URI without a scheme is assumed to be a local file
        if (!defined $uri->scheme) {
            # Systems with symbolic links -- rel2abs() is not trustworthy, use URI->abs().
            if ($Config::Config{"d_symlink"}) {
                my $base;
                $base = getcwd;
                $base =~ s/\/$//;
                $base =~ s/^\///;
                $base = new URI("file://localhost/$base/");
                $uri = new URI(join("/", splitdir($uritext)));
                $uri = $uri->abs($base);
            # Systems without symbolic links -- we assume rel2abs() is trustworthy.
            # Like MSWin32
            } else {
                $uri = new URI("file://localhost/" . join("/", splitdir(rel2abs($uritext))));
                $uri->scheme("file");
                $uri->host("localhost");
            }
        }
        if (!exists $ALLOWED_SCHEMES{$uri->scheme}) {
            warn "$THIS_FILE: WARNING: $uritext: Skipping URI with unsupported scheme " . $uri->scheme . "\n"
                if $VERBOSE > 0;
            next;
        }
        # file scheme is OK without a host -- it is localhost
        if ($uri->scheme eq "file") {
            $uri->host("localhost") if !defined $uri->host || $uri->host eq "";
            if ($uri->host ne "localhost") {
                warn "$THIS_FILE: WARNING: $uritext: Skipping file URI that is not on localhost\n"
                    if $uri->host ;
                next;
            }
        # schemes other than file require a host
        } else {
            if (!defined $uri->host || $uri->host eq "") {
                warn "$THIS_FILE: WARNING: $uritext: Skipping URI without a host\n"
                    if $VERBOSE > 0;
                next;
            }
        }
        if ($uri->path eq "") {
            warn "$THIS_FILE: WARNING: $uritext: URI not canonical\n"
                if $VERBOSE > 0;
            $uri->path("/");
        }
        $uri->fragment(undef);
        # Use its canonical form
        $uri = $uri->canonical;
        if (exists $URIS{$uri->as_string}) {
            warn "$THIS_FILE: WARNING: Skipping duplicated URI $uritext\n"
                if $VERBOSE > 0;
            next;
        }
        $URIS{$uri->as_string} = $uri;
        push @starts, $uri->as_string;
    }
    
    die "$THIS_FILE: ERROR: Please specify the URL to check\n"
        if @starts == 0;
    
    return @starts;
}

# check_links: Check links from an URL
sub check_links($) {
    local ($_, %_);
    my ($start, $page, $doc);
    $start = $_[0];
    
    @LOCALS = qw($start);
    %LFOUND = ($start => 1);
    $LOCAL_TARGET = get_target $start;
    @REMOTES = qw();
    ($TOTAL, $SUCCESS) = (0, 0);
    $STARTDIR = $URIS{$start}->path;
    $STARTDIR =~ s/[^\/]+$//;
    
    $page = $start;
    ($page, $doc) = fetch_redir $page;
    return if !defined $doc;
    get_links $doc, $page;
    while (@LOCALS > 0 || @REMOTES > 0) {
        # Always process locals first
        if (@LOCALS > 0) {
            @LOCALS = sort @LOCALS;
            $page = $LOCALS[0];
            ($page, $doc) = fetch_redir $page;
            # Skip the failed request
            next if !defined $doc;
            # Skip if non-recursive
            next if !$RECURSIVE;
            # Skip if it is excluded
            next if is_excluded $page;
            get_links $doc, $page;
        
        # Then the remote
        } else {
            @REMOTES = sort @REMOTES;
            $page = $REMOTES[0];
            fetch_redir $page;
            # We are not parsing remote documents
        }
    }
    
    print rep_uri($start) . ": $TOTAL checked, $SUCCESS success\n"
        if $VERBOSE > 0;
    return;
}

# fetch_redir: Fetch a document and deal with redirection
sub fetch_redir($) {
    local ($_, %_);
    my ($orig, $redir, $doc, $referer);
    $orig = $_[0];
    
    while (1) {
        ($doc, $redir) = fetch_noredir $orig;
        # No more redirection
        return ($orig, $doc) if !defined $redir;
        # Redirection target was scheduled
        return ($orig, undef) if exists $LFOUND{$redir} || exists $RFOUND{$redir};
        # Record the referer
        $REFS{$redir} = [] if !exists $REFS{$redir};
        $referer = [rep_uri $orig];
        push @$referer, @{${$REFS{$orig}}[0]} if exists $REFS{$orig};
        push @{$REFS{$redir}}, $referer;
        # Process the next
        $orig = $redir;
    }
}

# fetch_noredir: Fetch a document without redirection
sub fetch_noredir($) {
    my ($uritext, $uri, $ua, $r, $urirep);
    $uritext = $_[0];
    
    # Remove it from the list
    if (get_target($uritext) eq $LOCAL_TARGET) {
        shift @LOCALS;
    } else {
        shift @REMOTES;
    }
    # Set the target URI to report
    $urirep = rep_uri $uritext;
    $urirep .= " (Referer: " . join(", ", map join(" -> ", reverse @$_), @{$REFS{$uritext}}) . ")"
        if exists $REFS{$uritext};
    
    # Add the counter
    $TOTAL++;
    
    # Skip if this host has returned 503 Service Unavailable previously
    if (exists $NOREQ_HOSTS{noreq_key $uritext}) {
        $_ = $urirep;
        $_ .= " skipped for service unavailable in previous request to "
                . $NOREQ_HOSTS{noreq_key $uritext}
             if $VERBOSE > 0;
        print "$_\n";
        return (undef, undef);
    }
    
    $uri = $URIS{$uritext};
    
    print STDERR "Requesting $urirep... " if $VERBOSE > 2;
    $ua = get_ua $uritext;
    # HTTP or HTTPS -- use HTTP::Request for more sophisticated configuration
    if ($uri->scheme =~ /^https?$/) {
        my ($request, $w, $sigwarn);
        $request = new HTTP::Request(GET => $uri->as_string);
        # Set the referer
        $request->header("Referer", ${${$REFS{$uritext}}[0]}[0])
            if exists $REFS{$uritext};
        # Gracefully deal with robots.txt warnings
        $sigwarn = $SIG{"__WARN__"};
        undef $w;
        local $SIG{"__WARN__"} = sub { $w = $_[0]; };
        $r = $ua->simple_request($request);
        $SIG{"__WARN__"} = $sigwarn;
        if (defined $w) {
            # Malformed robots.txt warnings
            if ($w =~ /^RobotRules <[^\n]+>: Unexpected line: /) {
                # Skip malformed robots.txt warnings on remote sites
                # We are assumed to have no control on the remote sites
                if (get_target($uritext) ne $LOCAL_TARGET) {
                
                # Skip warnings about Crawl-delay:
                # As of writing this, WWW::RobotRules 1.33 in libwww-perl 5.805
                #   does not recognized the Crawl-delay: rule yet.
                } elsif ($w =~ /^RobotRules <[^\n]+>: Unexpected line: Crawl-delay:/i) {
                
                # Warn on malformed lines in our local robots.txt
                } else {
                    warn $w;
                }
            
            # Other warnings
            } else {
                warn $w;
            }
        }
    
    } else {
        $r = $ua->get($uri->as_string);
    }
    print STDERR "OK\n" if $VERBOSE > 2;
    
    # 204 No Content
    if ($r->code == 204) {
        $SUCCESS++;
        return (undef, undef);
    
    # 200 OK
    } elsif ($r->code == 200) {
        $SUCCESS++;
        # Only return the body if text/html
        # Fix the content type of XHTML in file URI
        if ($uri->scheme eq "file" && $uri->as_string =~ /\.xhtml$/) {
            $_ = "application/xhtml+xml";
        # Parse the content type -- keep the last one when multiple
        } else {
            @_ = split /,\s*/, $r->header("Content-Type");
            $_ = $_[$#_];
        }
        return ($r->content, undef)
            if /^text\/html\b/ || /^application\/xhtml\+xml\b/;
        return (undef, undef);
    
    # 3XX Redirect
    } elsif (grep($r->code == $_, (301, 302, 303, 307)) > 0) {
        my ($is_full_uri, $ruri, $rtarget);
        # Redirect without location is an error
        if (!defined $r->header("Location")) {
            print STDERR "$urirep " . $r->status_line . ": Redirect without location\n";
            return (undef, undef);
        }
        
        $ruri = new URI($r->header("Location"));
        
        # Deal with non-canonical location
        $is_full_uri = 1;
        # Non-absolute location should be warned
        if (!defined $ruri->scheme) {
            $is_full_uri = 0;
            $ruri->scheme($uri->scheme);
        }
        # Skip schemes that are not supported
        return (undef, undef) if !exists $ALLOWED_SCHEMES{$uri->scheme};
        if (!defined $ruri->host || $ruri->host eq "") {
            $is_full_uri = 0;
            $ruri->host($uri->host);
        }
        if ($ruri->path eq "") {
            $is_full_uri = 0;
            $ruri->path("/");
        }
        warn "$THIS_FILE: WARNING: $uritext: " . $r->status_line . ": "
                . "Location not canonical: " . $r->header("Location") . "\n"
            if !$is_full_uri && $VERBOSE > 1;
        $ruri->fragment(undef);
        # Use its canonical form
        $ruri = $ruri->canonical;
        
        # Save it
        $URIS{$ruri->as_string} = $ruri if !exists $URIS{$ruri->as_string};
        
        # 301 Moved Permanently should be treated as an error, because we need
        #   to change the reference from now on.
        if ($r->code == 301) {
            $_ = $urirep;
            $_ .= " " . $r->status_line . ": " . $ruri->as_string
                if $VERBOSE > 0;
            print "$_\n";
            return (undef, $ruri->as_string);
        }
        
        $SUCCESS++;
        # Warn when not quiet
        print "$urirep " . $r->status_line . ": " . $ruri->as_string . "\n"
                if $VERBOSE > 1;
        return (undef, $ruri->as_string);
    
    # Bad response that we cannot go on
    } elsif (grep($r->code == $_, (400, 403, 404, 410, 500)) > 0) {
        $_ = $urirep;
        $_ .= " " . $r->status_line if $VERBOSE > 0;
        print "$_\n";
        return (undef, undef);
    
    # Bad response that we should not keep requesting on a same server
    } elsif ($r->code == 503) {
        $_ = $urirep;
        $_ .= " " . $r->status_line if $VERBOSE > 0;
        print "$_\n";
        $NOREQ_HOSTS{noreq_key $uritext} = $uritext;
        return (undef, undef);
    
    # Unable to process
    } else {
        # Unable to process
        print "$urirep " . $r->status_line . ": Unable to process this response\n";
        return (undef, undef);
    }
    
    return;
}

# get_links: Parse the document and get the links inside
sub get_links($$) {
    local ($_, %_);
    my ($html, $base, $parser);
    ($html, $base) = @_;
    
    $parser = new HTML::LinkExtor;
    $parser->parse($html);
    @_ = qw();
    foreach my $link ($parser->links) {
        ($_, %_) = @$link;
        push @_, values %_;
    }
    proc_link_val $_, $base foreach @_;
    return;
}

# proc_link_val: Process a found link value
sub proc_link_val($$) {
    local ($_, %_);
    my ($base, $uri);
    ($_, $base) = @_;
    
    $uri = new URI($_);
    
    # Make it canonical
    $uri = $uri->abs($URIS{$base}) if !defined $uri->scheme;
    # Skip schemes that are not supported
    if (!exists $ALLOWED_SCHEMES{$uri->scheme}) {
        $_ = rep_uri $URIS{$base};
        print STDERR rep_uri($uri) . " (" . rep_uri($URIS{$base}) . "): "
                . "Skipping unsupported scheme \"" . $uri->scheme . "\"\n"
            if $VERBOSE > 3;
        return;
    }
    # Skip local file URI referred from remote
    if ($uri->scheme eq "file" && $URIS{$base}->scheme ne "file") {
        print STDERR rep_uri($uri) . " (" . rep_uri($URIS{$base}) . "): "
                . "Skipping file URI referred from remote\n"
            if $VERBOSE > 3;
        return;
    }
    # Skip file URI that is not local
    if ($uri->scheme eq "file" && $uri->host ne "localhost" && $uri->host ne "") {
        print STDERR rep_uri($uri) . " (" . rep_uri($URIS{$base}) . "): "
                . "Skipping file URI that is not on localhost\n"
            if $VERBOSE > 3;
        return;
    }
    $uri->host($URIS{$base}->host) if !defined $uri->host || $uri->host eq "";
    $uri->path("/") if $uri->path eq "";
    $uri->fragment(undef);
    # Use its canonical form
    $uri = $uri->canonical;
    
    # Save it
    $URIS{$uri->as_string} = $uri if !exists $URIS{$uri->as_string};
    $REFS{$uri->as_string} = [] if !exists $REFS{$uri->as_string};
    push @{$REFS{$uri->as_string}}, [rep_uri $base];
    
    # Local
    if (get_target($uri->as_string) eq $LOCAL_TARGET) {
        # Skip found URLs
        return if exists $LFOUND{$uri->as_string};
        # Skip parent directories
        return if !$PARENT && $uri->path !~ /^$STARTDIR/;
        push @LOCALS, $uri->as_string;
        $LFOUND{$uri->as_string} = 1;
    
    # Remote
    } else {
        # No remote
        return if $NOREMOTE;
        # Skip found URLs
        return if exists $RFOUND{$uri->as_string};
        push @REMOTES, $uri->as_string;
        $RFOUND{$uri->as_string} = 1;
    }
    
    return;
}

# is_excluded: If this page is excluded
sub is_excluded($) {
    local ($_, %_);
    my ($page, $path, $excpath, $match);
    $page = $_[0];
    
    foreach (@EXCLUDES) {
        $path = substr $_, 1;
        # Directory match
        if (/\/$/) {
            $match = ($URIS{$page}->path =~ /^$path/);
        # File or directory match
        } else {
            $match = ($URIS{$page}->path =~ /^$path(?![^\/?])/);
        }
        # minus (-) means "exclude"
        return /^-/ if $match;
    }
    
    return 0;
}

# get_ua: Return a user agent for the request
sub get_ua($) {
    local ($_, %_);
    $_ = $_[0];
    
    # Get the target of the request
    $_ = get_target $_;
    
    # User agent cached
    return $UA{$_} if exists $UA{$_};
    
    # Create a new user agent
    $UA{$_} = new LWP::RobotUA(
        "agent"         => $UASTR,
        "from"          => getlogin . "@" . hostname,
        "keep_alive"    => 1,
        "cookie_jar"    => $COOKIE_JAR,
    );
    $UA{$_}->delay(0);
    #$LWP::Debug::current_level{"debug"} = 1;
    $UA{$_}->default_header("Accept", "application/xhtml+xml,text/html;q=0.9,*/*;q=0.1");
    return $UA{$_};
}

# get_target: Return the target of the request
sub get_target($) {
    local ($_, %_);
    my ($uritext, $uri, $scheme, $user, $pass, $host, $port);
    $uritext = $_[0];
    
    die sprintf("%s %s", (caller 2)[1,2]) if !defined $uritext;
    
    # Target cached
    return $TARGETS{$uritext} if exists $TARGETS{$uritext};
    
    # Construct the target of our request
    $uri = $URIS{$uritext};
    $scheme = $uri->scheme;
    if (!$uri->can("userinfo") || !defined($_ = $uri->userinfo)) {
        ($user, $pass) = (undef, undef);
    } elsif ($_ =~ !/^([^:]+):(.+)$/) {
        ($user, $pass) = (uri_unescape($_), undef);
    } else {
        ($user, $pass) = (uri_unescape($1), uri_unescape($2));
    }
    $host = $uri->host;
    undef $port;
    $port = $uri->port if $uri->can("port");
    $_ = $uri->can("default_port") && defined $uri->default_port?
        $uri->default_port: undef;
    undef $port if defined $_ && $_ eq $port;
    
    # Construct the target URI
    $uri = new URI("");
    $uri->scheme($scheme);
    undef $_;
    if (defined $user) {
        $_ = uri_escape($user);
        $_ .= ":" . uri_escape($pass) if defined $pass;
        $uri->userinfo($_);
    }
    $uri->host($host);
    $uri->port($port) if defined $port;
    
    # Cache it
    $TARGETS{$uritext} = $uri->canonical;
    return $TARGETS{$uritext};
}

# noreq_key: Obtain the key to check if we should keep requesting on this server
sub noreq_key($) {
    local ($_, %_);
    my $uri;
    $_ = $_[0];
    
    $uri = new URI("");
    $uri->scheme($URIS{$_}->scheme);
    $uri->host(inet_aton($URIS{$_}->host));
    $uri->port($URIS{$_}->port) if $URIS{$_}->can("port");
    
    return $uri->canonical->as_string;
}

# rep_uri: Report an URI, a wrapper to URI->as_string that clean the file scheme
sub rep_uri($) {
    local ($_, %_);
    $_ = $_[0];
    $_ = $_->as_string if ref($_) =~ /^URI\b/;
    return $_ unless s/^file:\/\///;
    # Have something simular to a volumn ID, for MSWin32
    s/^\/([a-z]:)/$1/i;
    return catdir(split /\//, $_);
}

__END__

=head1 NAME

chklinks - A non-threaded perl link checker

=head1 SYNOPSIS

   chklinks [options] URL1 [URL2 [UR3 ...]]

=head1 DESCRIPTION

F<chklinks> is a perl link checker.  It helps finding broken links
on your website.

F<chklinks> differs from C<linkchecker> in that F<chklinks> is non-
threaded.  It does not raises many simultaneously connections for its
job.  It won't run out of the resources and crash your system in a
moment.  This is certainly more desirable for most webmasters and
users.

F<chklinks> follows robots.txt rules.  If you disallow robots from
your website and experience problems, you need to allow F<chklinks>.
Add the following lines to your robots.txt file to allow F<chklinks>:

  User-agent: chklinks
  Disallow:

F<chklinks> uses L<LWP::RobotUA(3)|LWP::RobotUA/3> and support the
following schemes: http, https, ftp, gopher, file.  You can also
specify a local file.  (To use https, you need to install
L<Crypt::SSLeay(3)|Crypt::SSLeay/3>.)

F<chklinks> supports cookies.

=head1 OPTIONS

=over

=item -1,--onelevel

Check the links on this page and stops.

=item -r,--recursive

Recursively check through this website.  This is the default.

=item -b,--below

Only check the links below this directory.  This is the default.

=item -p,--parent

Trace back to the parent directories.

=item -l,--local

Only check the links on this same host.

=item -s,--span

Check the links to other hosts (without recursion).  This is the
default.

=item -e,--exclude path

Exclude this path.  Check for their existence but not check the links
on them, just like they are on a foreign site.  Multiple C<--exclude>
are OK.

=item -i,--include path

Include this path.  An opposite of C<--exclude> that cancels its
effect.  The latter specified has a higher priority.

=item -d,--debug

Display debug messages.  Multiple C<--debug> to debug more.

=item -q,--quiet

Disable debug messages.  An opposite that cancels the effect of
C<--debug>.

=item -h,--help

Display the help message and exit.

=item -v,--version

Output version information and exit.

=item URL1, URL2, URL3

The URLs of the website to check for.

=back

=head1 BUGS

F<chklinks> does not support authentication yet.  W3C-LinkChecker
seems to have support on this.  As a workaround, You can use the
syntax http://user:pass@some.where.com/some/path for Basic
Authentication, but this does not work on Digest Authentication.
This practice is not encouraged, too.  Your password would be visable
to anyone on this system using ps, including hidden intruders.  Also
what you type in your shell will be saved to your shell history file.

C<mailto:> URLs should be supported by checking the validity of its
DNS/MX record.

Local file checking has only been tested on Unix and MSWin32.  More
platforms should be tested, especially VMS and Mac.

F<chklinks> does not obey C<Crawl-delay:> in F<robots.txt> yet.
This is a problem in L<WWW::RobotRules(3)|WWW::RobotRules/3>, but
not F<chklinks> itself.

=head1 SUPPORT

F<chklinks> is hosted on SourceForge, CPAN and Tavern IMACAT's.  For
the latest infomation, see http://chklinks.sourceforge.net/ ,
http://sourceforge.net/projects/chklinks/ ,
http://search.cpan.org/dist/chklinks/ or
http://www.imacat.idv.tw/tech/chklinks.html .

F<chklinks> has a user discussion mailing list hosted at
SourceForge: chklinks-users@lists.sourceforge.net .  It is a mailman
mailing list.  For infomation on how to join or leave, see:
http://lists.sourceforge.net/lists/listinfo/chklinks-users .
Alternatively, you can send a mail to:
chklinks-users-request@lists.sourceforge.net with the subject C<help>
for a list of available commands.

=head1 SEE ALSO

L<LWP::UserAgent(3)|LWP::UserAgent/3>,
L<LWP::RobotUA(3)|LWP::RobotUA/3>, L<WWW::RobotRules(3)|WWW::RobotRules/3>,
L<URI(3)|URI/3>, L<HTML::LinkExtor(3)|HTML::LinkExtor/3>,
Bastian Kleineidam's L<linkchecker(1)|linkchecker/1>.
W3C-LinkChecker L<checklink(1)|checklink/1>.

=head1 AUTHOR

imacat <imacat@mail.imacat.idv.tw>.

=head1 COPYRIGHT

Copyright (c) 2003-2006 imacat.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but I<WITHOUT ANY WARRANTY>; without even the implied warranty of
I<MERCHANTABILITY> or I<FITNESS FOR A PARTICULAR PURPOSE>.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

=cut
