#! /usr/bin/perl -w
# A non-threaded perl link checker

# Copyright (c) 2003-2006 imacat.
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

# vҦ (c) 2003-2006 ̺
# 
# {OۥѳnAziHӦۥѳn|] Free Software Foundation
# ^X GNU qΤ@\iұڡ] GNU General Public License ^ĤG
# ӭקMsoGo@{AΪ̦ۥѿܨϥΥsC
# 
# oGo@{تOƱ楦ΡASOCƦܨSAXSwت
# tOCԲӪpаѾ\ GNU qΤ@\iҡC
# 
# zӤwgM{@_@ GNU qΤ@\iҪƥCpG٨SAg
# HG Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

# First written 2003-04-12

use 5.006;
use strict;
use Config qw();
use Cwd qw(getcwd);
use File::Spec qw();
use Getopt::Long qw(GetOptions);
use HTML::LinkExtor qw();
use HTTP::Request qw();
use HTTP::Cookies qw();
use LWP::RobotUA qw();
use Socket qw(inet_aton);
use Sys::Hostname qw(hostname);
use URI qw();
use URI::Escape qw(uri_escape uri_unescape);
# Prototype declaration
sub main();
sub parse_args();
sub check_links($);
sub fetch_redir($);
sub fetch_noredir($);
sub get_links($$);
sub proc_link_val($$);
sub is_excluded($);
sub get_ua($);
sub get_target($);
sub noreq_key($);
sub rep_uri($);

use vars qw($THIS_FILE $VERSION);
$0 =~ /[^\/]*$/;
$THIS_FILE = $&;
$VERSION = "3.02";

use vars qw(%UA %TARGETS %URIS %REFS $COOKIE_JAR);
use vars qw($LOCAL_TARGET @LOCALS %LFOUND $STARTDIR @REMOTES %RFOUND);
use vars qw($TOTAL $SUCCESS);
use vars qw(%NOREQ_HOSTS);
%UA = qw();
%TARGETS = qw();
%URIS = qw();
%REFS = qw();
%NOREQ_HOSTS = qw();
$COOKIE_JAR = new HTTP::Cookies;

use vars qw($RECURSIVE $PARENT $NOREMOTE @EXCLUDES $VERBOSE);
$RECURSIVE = 1;
$PARENT = 0;
$NOREMOTE = 0;
@EXCLUDES = qw();
$VERBOSE = 1;

use vars qw($UASTR $VERSTR $SHORTHELP $HELPMSG);
$UASTR = "$THIS_FILE-robot/$VERSION (using " . (LWP::RobotUA->can("_agent")?
    LWP::RobotUA->_agent: "libwww-perl/" . $LWP::VERSION) . ")";
$SHORTHELP = "Try `$THIS_FILE --help' for more information.";
$VERSTR = "$& v$VERSION by imacat <imacat\@mail.imacat.idv.tw>";
$HELPMSG = << "EOT";
Usage: $THIS_FILE [options] url1 [url2 [url3 ...]]
Check links on the specific website.

  -1,--onelevel      Check the links on this page and stops.
  -r,--recursive     Recursively check through this site. (default)
  -b,--below         Only check the links below this directory. (default)
  -p,--parent        Trace back to the parent directories.
  -l,--local         Only check the links on this same host.
  -s,--span          Check the links to other hosts (without recursion).
                     (default)
  -e,--exclude path  Exclude this path.  Check for their existence but not
                     check the links on them, just like they are on a foreign
                     site.  Multiple --exclude are OK.
  -i,--include path  Include this path.  An opposite of --exclude that cancels
                     its effect.  The latter specified has a higher priority.
  -d,--debug         Display debug messages.  Multiple --debug to debug more.
  -q,--quiet         Disable debug messages.  An opposite that cancels the
                     effect of --debug.
  -h,--help          Display this help.
  -v,--version       Display version number.
  url1, url2, url3   The URLs of the websites to check against.

EOT

use vars qw(%ALLOWED_SCHEMES);
%ALLOWED_SCHEMES = map { $_ => 1 } qw(http https ftp file gopher);

main;
exit 0;


# main: Main program
sub main() {
    local ($_, %_);
    my @starts;
    
    @starts = parse_args;
    check_links $_ foreach @starts;
    
    print STDERR "Done.  " . (time - $^T) . " seconds elapsed.\n"
        if $VERBOSE > 0;
    return;
}

# parse_args: Parse the arguments
sub parse_args() {
    local $_;
    my @starts;
    
    # Get the arguments
    eval {
        local $SIG{__WARN__} = sub { die $_[0]; };
        Getopt::Long::Configure(qw(no_auto_abbrev bundling));
        GetOptions( "onelevel|1"=>sub { $RECURSIVE = 0; },
                    "recursive|r"=>sub { $RECURSIVE = 1; },
                    "below|b"=>sub { $PARENT = 0; },
                    "parent|p"=>sub { $PARENT = 1; },
                    "local|l"=>sub { $NOREMOTE = 1; },
                    "span|s"=>sub { $NOREMOTE = 0; },
                    "exclude|e=s"=>sub { unshift @EXCLUDES, "-" . $_[1]; },
                    "include|i=s"=>sub { unshift @EXCLUDES, "+" . $_[1]; },
                    "debug|d"=>sub { $VERBOSE++; },
                    "quiet|q"=>sub { $VERBOSE-- if $VERBOSE > 0; },
                    "help|h"=>sub { print $HELPMSG; exit 0; },
                    "version|v"=>sub { print "$VERSTR\n"; exit 0; });
    };
    die "$THIS_FILE: ERROR: $@" if $@ ne "";
    
    # Show progress
    $| = 1 if $VERBOSE > 2;
    
    @starts = qw();
    foreach my $uritext (@ARGV) {
        my $uri;
        $uri = new URI($uritext);
        # Single letter scheme is assumed to be a volumn label
        # This is for MSWin32
        $uri->scheme(undef) if defined $uri->scheme && $uri->scheme =~ /^[a-z]$/i;
        # URI without a scheme is assumed to be a local file
        if (!defined $uri->scheme) {
            # Systems with symbolic links -- rel2abs() is not trustworthy, use URI->abs().
            if ($Config::Config{"d_symlink"}) {
                my $base;
                $base = getcwd;
                $base =~ s/\/$//;
                $base =~ s/^\///;
                $base = new URI("file://localhost/$base/");
                $uri = new URI(join("/", File::Spec->splitdir($uritext)));
                $uri = $uri->abs($base);
            # Systems without symbolic links -- we assume rel2abs() is trustworthy.
            # Like MSWin32
            } else {
                $uri = new URI("file://localhost/" . join("/", File::Spec->splitdir(File::Spec->rel2abs($uritext))));
                $uri->scheme("file");
                $uri->host("localhost");
            }
        }
        if (!exists $ALLOWED_SCHEMES{$uri->scheme}) {
            warn "$THIS_FILE: WARNING: $uritext: Skipping URI with unsupported scheme " . $uri->scheme . "\n"
                if $VERBOSE > 0;
            next;
        }
        # file scheme is OK without a host -- it is localhost
        if ($uri->scheme eq "file") {
            $uri->host("localhost") if !defined $uri->host || $uri->host eq "";
            if ($uri->host ne "localhost") {
                warn "$THIS_FILE: WARNING: $uritext: Skipping file URI that is not on localhost\n"
                    if $uri->host ;
                next;
            }
        # schemes other than file require a host
        } else {
            if (!defined $uri->host || $uri->host eq "") {
                warn "$THIS_FILE: WARNING: $uritext: Skipping URI without a host\n"
                    if $VERBOSE > 0;
                next;
            }
        }
        if ($uri->path eq "") {
            warn "$THIS_FILE: WARNING: $uritext: URI not canonical\n"
                if $VERBOSE > 0;
            $uri->path("/");
        }
        $uri->fragment(undef);
        # Use its canonical form
        $uri = $uri->canonical;
        if (exists $URIS{$uri->as_string}) {
            warn "$THIS_FILE: WARNING: Skipping duplicated URI $uritext\n"
                if $VERBOSE > 0;
            next;
        }
        $URIS{$uri->as_string} = $uri;
        push @starts, $uri->as_string;
    }
    
    die "$THIS_FILE: ERROR: Please specify the URL to check\n"
        if @starts == 0;
    
    return @starts;
}

# check_links: Check links from an URL
sub check_links($) {
    local ($_, %_);
    my ($start, $page, $doc);
    $start = $_[0];
    
    @LOCALS = qw($start);
    %LFOUND = ($start => 1);
    $LOCAL_TARGET = get_target $start;
    @REMOTES = qw();
    ($TOTAL, $SUCCESS) = (0, 0);
    $STARTDIR = $URIS{$start}->path;
    $STARTDIR =~ s/[^\/]+$//;
    
    $page = $start;
    ($page, $doc) = fetch_redir $page;
    return if !defined $doc;
    get_links $doc, $page;
    while (@LOCALS > 0 || @REMOTES > 0) {
        # Always process locals first
        if (@LOCALS > 0) {
            @LOCALS = sort @LOCALS;
            $page = $LOCALS[0];
            ($page, $doc) = fetch_redir $page;
            # Skip the failed request
            next if !defined $doc;
            # Skip if non-recursive
            next if !$RECURSIVE;
            # Skip if it is excluded
            next if is_excluded $page;
            get_links $doc, $page;
        
        # Then the remote
        } else {
            @REMOTES = sort @REMOTES;
            $page = $REMOTES[0];
            fetch_redir $page;
            # We are not parsing remote documents
        }
    }
    
    print rep_uri($start) . ": $TOTAL checked, $SUCCESS success\n"
        if $VERBOSE > 0;
    return;
}

# fetch_redir: Fetch a document and deal with redirection
sub fetch_redir($) {
    local ($_, %_);
    my ($orig, $redir, $doc, $referer);
    $orig = $_[0];
    
    while (1) {
        ($doc, $redir) = fetch_noredir $orig;
        # No more redirection
        return ($orig, $doc) if !defined $redir;
        # Redirection target was scheduled
        return ($orig, undef) if exists $LFOUND{$redir} || exists $RFOUND{$redir};
        # Record the referer
        $REFS{$redir} = [] if !exists $REFS{$redir};
        $referer = [rep_uri $orig];
        push @$referer, @{${$REFS{$orig}}[0]} if exists $REFS{$orig};
        push @{$REFS{$redir}}, $referer;
        # Process the next
        $orig = $redir;
    }
}

# fetch_noredir: Fetch a document without redirection
sub fetch_noredir($) {
    my ($uritext, $uri, $ua, $r, $urirep);
    $uritext = $_[0];
    
    # Remove it from the list
    if (get_target($uritext) eq $LOCAL_TARGET) {
        shift @LOCALS;
    } else {
        shift @REMOTES;
    }
    # Set the target URI to report
    $urirep = rep_uri $uritext;
    $urirep .= " (Referer: " . join(", ", map join(" -> ", reverse @$_), @{$REFS{$uritext}}) . ")"
        if exists $REFS{$uritext};
    
    # Add the counter
    $TOTAL++;
    
    # Skip if this host has returned 503 Service Unavailable previously
    if (exists $NOREQ_HOSTS{noreq_key $uritext}) {
        $_ = $urirep;
        $_ .= " skipped for service unavailable in previous request to "
                . $NOREQ_HOSTS{noreq_key $uritext}
             if $VERBOSE > 0;
        print "$_\n";
        return (undef, undef);
    }
    
    $uri = $URIS{$uritext};
    
    print STDERR "Requesting $urirep... " if $VERBOSE > 2;
    $ua = get_ua $uritext;
    # HTTP or HTTPS -- use HTTP::Request for more sophisticated configuration
    if ($uri->scheme =~ /^https?$/) {
        my ($request, $w, $sigwarn);
        $request = new HTTP::Request(GET => $uri->as_string);
        # Set the referer
        $request->header("Referer", ${${$REFS{$uritext}}[0]}[0])
            if exists $REFS{$uritext};
        # Gracefully deal with robots.txt warnings
        $sigwarn = $SIG{"__WARN__"};
        undef $w;
        local $SIG{"__WARN__"} = sub { $w = $_[0]; };
        $r = $ua->simple_request($request);
        $SIG{"__WARN__"} = $sigwarn;
        if (defined $w) {
            # Malformed robots.txt warnings
            if ($w =~ /^RobotRules <[^\n]+>: Unexpected line: /) {
                # Skip malformed robots.txt warnings on remote sites
                # We are assumed to have no control on the remote sites
                if (get_target($uritext) ne $LOCAL_TARGET) {
                
                # Skip warnings about Crawl-delay:
                # As of writing this, WWW::RobotRules 1.33 in libwww-perl 5.805
                #   does not recognized the Crawl-delay: rule yet.
                } elsif ($w =~ /^RobotRules <[^\n]+>: Unexpected line: Crawl-delay:/i) {
                
                # Warn on malformed lines in our local robots.txt
                } else {
                    warn $w;
                }
            
            # Other warnings
            } else {
                warn $w;
            }
        }
    
    } else {
        $r = $ua->get($uri->as_string);
    }
    print STDERR "OK\n" if $VERBOSE > 2;
    
    # 204 No Content
    if ($r->code == 204) {
        $SUCCESS++;
        return (undef, undef);
    
    # 200 OK
    } elsif ($r->code == 200) {
        $SUCCESS++;
        # Only return the body if text/html
        # Fix the content type of XHTML in file URI
        if ($uri->scheme eq "file" && $uri->as_string =~ /\.xhtml$/) {
            $_ = "application/xhtml+xml";
        # Parse the content type -- keep the last one when multiple
        } else {
            @_ = split /,\s*/, $r->header("Content-Type");
            $_ = $_[$#_];
        }
        return ($r->content, undef)
            if /^text\/html\b/ || /^application\/xhtml\+xml\b/;
        return (undef, undef);
    
    # 3XX Redirect
    } elsif (grep($r->code == $_, (301, 302, 303, 307)) > 0) {
        my ($is_full_uri, $ruri, $rtarget);
        # Redirect without location is an error
        if (!defined $r->header("Location")) {
            print STDERR "$urirep " . $r->status_line . ": Redirect without location\n";
            return (undef, undef);
        }
        
        $ruri = new URI($r->header("Location"));
        
        # Deal with non-canonical location
        $is_full_uri = 1;
        # Non-absolute location should be warned
        if (!defined $ruri->scheme) {
            $is_full_uri = 0;
            $ruri->scheme($uri->scheme);
        }
        # Skip schemes that are not supported
        return (undef, undef) if !exists $ALLOWED_SCHEMES{$uri->scheme};
        if (!defined $ruri->host || $ruri->host eq "") {
            $is_full_uri = 0;
            $ruri->host($uri->host);
        }
        if ($ruri->path eq "") {
            $is_full_uri = 0;
            $ruri->path("/");
        }
        warn "$THIS_FILE: WARNING: $uritext: " . $r->status_line . ": "
                . "Location not canonical: " . $r->header("Location") . "\n"
            if !$is_full_uri && $VERBOSE > 1;
        $ruri->fragment(undef);
        # Use its canonical form
        $ruri = $ruri->canonical;
        
        # Save it
        $URIS{$ruri->as_string} = $ruri if !exists $URIS{$ruri->as_string};
        
        # 301 Moved Permanently should be treated as an error, because we need
        #   to change the reference from now on.
        if ($r->code == 301) {
            $_ = $urirep;
            $_ .= " " . $r->status_line . ": " . $ruri->as_string
                if $VERBOSE > 0;
            print "$_\n";
            return (undef, $ruri->as_string);
        }
        
        $SUCCESS++;
        # Warn when not quiet
        print "$urirep " . $r->status_line . ": " . $ruri->as_string . "\n"
                if $VERBOSE > 1;
        return (undef, $ruri->as_string);
    
    # Bad response that we cannot go on
    } elsif (grep($r->code == $_, (400, 403, 404, 410, 500)) > 0) {
        $_ = $urirep;
        $_ .= " " . $r->status_line if $VERBOSE > 0;
        print "$_\n";
        return (undef, undef);
    
    # Bad response that we should not keep requesting on a same server
    } elsif ($r->code == 503) {
        $_ = $urirep;
        $_ .= " " . $r->status_line if $VERBOSE > 0;
        print "$_\n";
        $NOREQ_HOSTS{noreq_key $uritext} = $uritext;
        return (undef, undef);
    
    # Unable to process
    } else {
        # Unable to process
        print "$urirep " . $r->status_line . ": Unable to process this response\n";
        return (undef, undef);
    }
    
    return;
}

# get_links: Parse the document and get the links inside
sub get_links($$) {
    local ($_, %_);
    my ($html, $base, $parser);
    ($html, $base) = @_;
    
    $parser = new HTML::LinkExtor;
    $parser->parse($html);
    @_ = qw();
    foreach my $link ($parser->links) {
        ($_, %_) = @$link;
        push @_, values %_;
    }
    proc_link_val $_, $base foreach @_;
    return;
}

# proc_link_val: Process a found link value
sub proc_link_val($$) {
    local ($_, %_);
    my ($base, $uri);
    ($_, $base) = @_;
    
    $uri = new URI($_);
    
    # Make it canonical
    $uri = $uri->abs($URIS{$base}) if !defined $uri->scheme;
    # Skip schemes that are not supported
    if (!exists $ALLOWED_SCHEMES{$uri->scheme}) {
        $_ = rep_uri $URIS{$base};
        print STDERR rep_uri($uri) . " (" . rep_uri($URIS{$base}) . "): "
                . "Skipping unsupported scheme \"" . $uri->scheme . "\"\n"
            if $VERBOSE > 3;
        return;
    }
    # Skip local file URI referred from remote
    if ($uri->scheme eq "file" && $URIS{$base}->scheme ne "file") {
        print STDERR rep_uri($uri) . " (" . rep_uri($URIS{$base}) . "): "
                . "Skipping file URI referred from remote\n"
            if $VERBOSE > 3;
        return;
    }
    # Skip file URI that is not local
    if ($uri->scheme eq "file" && $uri->host ne "localhost" && $uri->host ne "") {
        print STDERR rep_uri($uri) . " (" . rep_uri($URIS{$base}) . "): "
                . "Skipping file URI that is not on localhost\n"
            if $VERBOSE > 3;
        return;
    }
    $uri->host($URIS{$base}->host) if !defined $uri->host || $uri->host eq "";
    $uri->path("/") if $uri->path eq "";
    $uri->fragment(undef);
    # Use its canonical form
    $uri = $uri->canonical;
    
    # Save it
    $URIS{$uri->as_string} = $uri if !exists $URIS{$uri->as_string};
    $REFS{$uri->as_string} = [] if !exists $REFS{$uri->as_string};
    push @{$REFS{$uri->as_string}}, [rep_uri $base];
    
    # Local
    if (get_target($uri->as_string) eq $LOCAL_TARGET) {
        # Skip found URLs
        return if exists $LFOUND{$uri->as_string};
        # Skip parent directories
        return if !$PARENT && $uri->path !~ /^$STARTDIR/;
        push @LOCALS, $uri->as_string;
        $LFOUND{$uri->as_string} = 1;
    
    # Remote
    } else {
        # No remote
        return if $NOREMOTE;
        # Skip found URLs
        return if exists $RFOUND{$uri->as_string};
        push @REMOTES, $uri->as_string;
        $RFOUND{$uri->as_string} = 1;
    }
    
    return;
}

# is_excluded: If this page is excluded
sub is_excluded($) {
    local ($_, %_);
    my ($page, $path, $excpath, $match);
    $page = $_[0];
    
    foreach (@EXCLUDES) {
        $path = substr $_, 1;
        # Directory match
        if (/\/$/) {
            $match = ($URIS{$page}->path =~ /^$path/);
        # File or directory match
        } else {
            $match = ($URIS{$page}->path =~ /^$path(?![^\/?])/);
        }
        # minus (-) means "exclude"
        return /^-/ if $match;
    }
    
    return 0;
}

# get_ua: Return a user agent for the request
sub get_ua($) {
    local ($_, %_);
    $_ = $_[0];
    
    # Get the target of the request
    $_ = get_target $_;
    
    # User agent cached
    return $UA{$_} if exists $UA{$_};
    
    # Create a new user agent
    $UA{$_} = new LWP::RobotUA(
        "agent"         => $UASTR,
        "from"          => getlogin . "@" . hostname,
        "keep_alive"    => 1,
        "cookie_jar"    => $COOKIE_JAR,
    );
    $UA{$_}->delay(0);
    #$LWP::Debug::current_level{"debug"} = 1;
    $UA{$_}->default_header("Accept", "application/xhtml+xml,text/html;q=0.9,*/*;q=0.1");
    return $UA{$_};
}

# get_target: Return the target of the request
sub get_target($) {
    local ($_, %_);
    my ($uritext, $uri, $scheme, $user, $pass, $host, $port);
    $uritext = $_[0];
    
    die sprintf("%s %s", (caller 2)[1,2]) if !defined $uritext;
    
    # Target cached
    return $TARGETS{$uritext} if exists $TARGETS{$uritext};
    
    # Construct the target of our request
    $uri = $URIS{$uritext};
    $scheme = $uri->scheme;
    if (!$uri->can("userinfo") || !defined($_ = $uri->userinfo)) {
        ($user, $pass) = (undef, undef);
    } elsif ($_ =~ !/^([^:]+):(.+)$/) {
        ($user, $pass) = (uri_unescape($_), undef);
    } else {
        ($user, $pass) = (uri_unescape($1), uri_unescape($2));
    }
    $host = $uri->host;
    undef $port;
    $port = $uri->port if $uri->can("port");
    $_ = $uri->can("default_port") && defined $uri->default_port?
        $uri->default_port: undef;
    undef $port if defined $_ && $_ eq $port;
    
    # Construct the target URI
    $uri = new URI("");
    $uri->scheme($scheme);
    undef $_;
    if (defined $user) {
        $_ = uri_escape($user);
        $_ .= ":" . uri_escape($pass) if defined $pass;
        $uri->userinfo($_);
    }
    $uri->host($host);
    $uri->port($port) if defined $port;
    
    # Cache it
    $TARGETS{$uritext} = $uri->canonical;
    return $TARGETS{$uritext};
}

# noreq_key: Obtain the key to check if we should keep requesting on this server
sub noreq_key($) {
    local ($_, %_);
    my $uri;
    $_ = $_[0];
    
    $uri = new URI("");
    $uri->scheme($URIS{$_}->scheme);
    $uri->host(inet_aton($URIS{$_}->host));
    $uri->port($URIS{$_}->port) if $URIS{$_}->can("port");
    
    return $uri->canonical->as_string;
}

# rep_uri: Report an URI, a wrapper to URI->as_string that clean the file scheme
sub rep_uri($) {
    local ($_, %_);
    $_ = $_[0];
    $_ = $_->as_string if ref($_) =~ /^URI\b/;
    return $_ unless s/^file:\/\///;
    # Have something simular to a volumn ID, for MSWin32
    s/^\/([a-z]:)/$1/i;
    return File::Spec->catdir(split /\//, $_);
}

__END__

=head1 NAME

chklinks - A non-threaded Perl link checker

=head1 SYNOPSIS

   chklinks [options] URL1 [URL2 [UR3 ...]]

=head1 DESCRIPTION

F<chklinks> is a Perl link checker.  It helps finding broken links
on your website.

F<chklinks> differs from C<linkchecker> in that F<chklinks> is non-
threaded.  It does not raises many simultaneously connections for its
job.  It won't run out of the resources and crash your system in a
moment.  This is certainly more desirable for most webmasters and
users.

F<chklinks> follows F<robots.txt> rules.  If you disallow robots
from your website and experience problems, you need to allow
F<chklinks>.  Add the following lines to your F<robots.txt> file to
allow F<chklinks>:

  User-agent: chklinks
  Disallow:

F<chklinks> uses L<LWP::RobotUA(3)|LWP::RobotUA/3> and support the
following schemes: http, https, ftp, gopher and file.  You can also
specify a local file.  (To use https, you need to install
L<Crypt::SSLeay(3)|Crypt::SSLeay/3>.  This is the requirement of
L<LWP::RobotUA(3)|LWP::RobotUA/3>.)

F<chklinks> supports cookies.

=head1 OPTIONS

=over

=item -1,--onelevel

Check the links on this page and stops.

=item -r,--recursive

Recursively check through this website.  This is the default.

=item -b,--below

Only check the links below this directory.  This is the default.

=item -p,--parent

Trace back to the parent directories.

=item -l,--local

Only check the links on this same host.

=item -s,--span

Check the links to other hosts (without recursion).  This is the
default.

=item -e,--exclude path

Exclude this path.  Check for their existence but not check the links
on them, just like they are on a foreign site.  Multiple C<--exclude>
are OK.

=item -i,--include path

Include this path.  An opposite of C<--exclude> that cancels its
effect.  The latter specified has a higher priority.

=item -d,--debug

Display debug messages.  Multiple C<--debug> to debug more.

=item -q,--quiet

Disable debug messages.  An opposite that cancels the effect of
C<--debug>.

=item -h,--help

Display the help message and exit.

=item -v,--version

Output version information and exit.

=item URL1, URL2, URL3

The URLs of the websites to check against.

=back

=head1 BUGS

F<chklinks> does not support authentication yet.  W3C-LinkChecker
have support on this.  As a workaround, You can use the syntax
http://user:pass@some.where.com/some/path for Basic Authentication,
but this does not work on Digest Authentication.  This practice is
not encouraged.  Your password would be visable to anyone on this
system using F<ps>, including hidden intruders.  Also what you type
in your shell will be saved to your shell history file.

C<mailto:> URLs should be supported by checking the validity of its
DNS/MX record.  Bastian Kleineidam's L<linkchecker(1)|linkchecker/1>
have support on this.

Local file checking has only been tested on Unix and MSWin32.  More
platforms should be tested, especially VMS and Mac.

F<chklinks> does not obey C<Crawl-delay:> in F<robots.txt> yet.
This is a problem in L<WWW::RobotRules(3)|WWW::RobotRules/3>, but
not F<chklinks> itself.

=head1 SUPPORT

F<chklinks> is hosted on SourceForge, CPAN and Tavern IMACAT's.  For
the latest infomation, see http://chklinks.sourceforge.net/ ,
http://sourceforge.net/projects/chklinks/ ,
http://search.cpan.org/dist/chklinks/ or
http://www.imacat.idv.tw/tech/chklinks.html .

F<chklinks> has a user discussion mailing list hosted at
SourceForge: chklinks-users@lists.sourceforge.net .  It is a mailman
mailing list.  For infomation on how to join or leave, see:
http://lists.sourceforge.net/lists/listinfo/chklinks-users .
Alternatively, you can send a mail to:
chklinks-users-request@lists.sourceforge.net with the subject C<help>
for a list of available commands.

=head1 SEE ALSO

L<LWP::UserAgent(3)|LWP::UserAgent/3>,
L<LWP::RobotUA(3)|LWP::RobotUA/3>, L<WWW::RobotRules(3)|WWW::RobotRules/3>,
L<URI(3)|URI/3>, L<HTML::LinkExtor(3)|HTML::LinkExtor/3>,
Bastian Kleineidam's L<linkchecker(1)|linkchecker/1>,
W3C-LinkChecker L<checklink(1)|checklink/1>.

=head1 AUTHOR

imacat <imacat@mail.imacat.idv.tw>.

=head1 COPYRIGHT

Copyright (c) 2003-2006 imacat.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but I<WITHOUT ANY WARRANTY>; without even the implied warranty of
I<MERCHANTABILITY> or I<FITNESS FOR A PARTICULAR PURPOSE>.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

=cut
