=head1 NAME

HTTP::Link::Parser - parse HTTP Link headers

=head1 SYNOPSIS

  use HTTP::Link::Parser ':standard';
  use LWP::UserAgent;
  
  my $ua = LWP::UserAgent->new;
  my $response = $ua->get("http://example.com/foo");
  
  # Parse link headers into an RDF::Trine::Model.
  my $model = parse_links_into_model($response);

  # Find data about <http://example.com/foo>.
  my $iterator = $model->get_statements(
    RDF::Trine::Node::Resource->new('http://example.com/foo'),
    undef,
    undef);

  while ($statement = $iterator->next)
  {
     # Skip data where the value is not a resource (i.e. link)
     next unless $statement->object->is_resource;

     printf("Link to <%s> with rel=\"%s\".\n",
        $statement->object->uri,
        $statement->predicate->uri);
  }

=cut

package HTTP::Link::Parser;

use 5.006;
use strict;
no warnings;

require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = (
	'all'      => [ qw(parse_links_into_model parse_links_to_rdfjson parse_links_to_list parse_single_link relationship_uri) ],
	'standard' => [ qw(parse_links_into_model parse_links_to_rdfjson) ]
	);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT    = ( @{ $EXPORT_TAGS{'standard'} } );

=head1 VERSION

0.05

=cut

our $VERSION = '0.05';

use Encode qw(decode encode_utf8);
use RDF::Trine 0.112;
use URI;
use URI::Escape;

=head1 DESCRIPTION

HTTP::Link::Parser parses HTTP "Link" headers found in an
HTTP::Response object. Headers should conform to the format
described in the forthcoming IETF specification.

=head2 Functions

To export all functions:

  use HTTP::Link::Parser ':all';

=over 4

=item C<< $model = parse_links_into_model($response, [$existing_model]) >>

Takes an L<HTTP::Response> object (or in fact, any L<HTTP::Message> object)
and returns an L<RDF::Trine::Model> containing link data extracted from the
response. Dublin Core is used to encode 'hreflang', 'title' and 'type' link
parameters.

C<$existing_model> is an RDF::Trine::Model to add data to. If omitted, a
new, empty model is created.

=cut

sub parse_links_into_model
{
	my $response =  shift;
	my $model    =  shift
	             || RDF::Trine::Model->new( RDF::Trine::Store::DBI->temporary_store );
	$model->add_hashref(parse_links_to_rdfjson($response));
	return $model;
}

=item C<< $data = parse_links_to_rdfjson($response) >>

C<$data> is a hashref with a structure inspired by the RDF/JSON
specification. This can be thought of as a shortcut for:

  use RDF::Trine 0.112;
  $model = parse_links_into_model($response);
  $data  = $model->as_hashref;

But it's faster as no intermediate model is built.

=cut

sub parse_links_to_rdfjson
{
	my $response = shift;
	my $base     = URI->new($response->base);
	my $links    = parse_links_to_list($response);
	my $rv       = {};
	
	foreach my $link (@$links)
	{
		my $subject = $base;
		
		$subject = $link->{'anchor'}
			if defined $link->{'anchor'};
		
		my $object = $link->{'URI'};
		
		foreach my $r (@{ $link->{'rel'} })
		{
			my $r1 = relationship_uri($r);
			push @{ $rv->{ $subject }->{ $r1 } },
				{
					'value'    => "$object",
					'type'     => 'uri',
				};
		}

		foreach my $r (@{ $link->{'rev'} })
		{
			my $r1 = relationship_uri($r);
			push @{ $rv->{ $object }->{ $r1 } },
				{
					'value'    => "$subject",
					'type'     => 'uri',
				};
		}
		
		if (defined $link->{'title'})
		{
			push @{ $rv->{ $object }->{ 'http://purl.org/dc/terms/title' } },
				{
					'value'    => $link->{'title'},
					'type'     => 'literal',
				};
		}
		
		if (defined $link->{'title*'})
		{
			foreach my $t (@{ $link->{'title*'} })
			{
				push @{ $rv->{ $object }->{ 'http://purl.org/dc/terms/title' } },
					{
						'value'    => encode_utf8("$t"),
						'type'     => 'literal',
						'lang'     => $t->lang,
					};
			}
		}

		if (defined $link->{'hreflang'})
		{
			foreach my $lang (@{ $link->{'hreflang'} })
			{
				push @{ $rv->{ $object }->{ 'http://purl.org/dc/terms/language' } },
					{
						'value'    => 'http://www.lingvoj.org/lingvo/' . uri_escape(lc $lang),
						'type'     => 'uri',
					};
			}
		}

		if (defined $link->{'type'} && $link->{'type'} =~ m?([A-Z0-9\!\#\$\&\.\+\-\^\_]{1,127})/([A-Z0-9\!\#\$\&\.\+\-\^\_]{1,127})?i)
		{
			my $type    = lc $1;
			my $subtype = lc $2;
			push @{ $rv->{ $object }->{ 'http://purl.org/dc/terms/format' } },
				{
					'value'    => 'http://www.iana.org/assignments/media-types/'.uri_escape($type).'/'.uri_escape($subtype),
					'type'     => 'uri',
				};
		}
	}
	
	return $rv;
}

=item C<< $list = parse_links_to_list($response) >>

This function is not exported by default. 

C<$list> is an arrayref of hashrefs. Each hashref contains keys
corresponding to the link parameters of the link, and a key called
'URI' corresponding to the target of the link.

The 'rel' and 'rev' keys are arrayrefs containing lists of
relationships. If the Link used the short form of a registered
relationship, then the short form is present on this list. Short
forms can be converted to long forms (URIs) using the
C<relationship_uri> function.

The structure returned by this function should not be considered
stable.

=cut

sub parse_links_to_list
{
	my $response = shift;	my $rv       = [];
	my $base     = URI->new($response->base);
	
	foreach my $header ($response->header('link'))
	{
		push @$rv, parse_single_link($header, $base);
	}
	
	return $rv;
}

=item C<< $data = parse_single_link($link, $base) >>

This function is not exported by default. 

This parses a single Link header (minus the "Link:" bit itself) into a hashref
structure. A base URI must be included in case the link contains relative URIs.

The structure returned by this function should not be considered
stable.

=cut

sub parse_single_link
{
	my $hdrv = shift;
	my $base = shift;
	my $rv   = {};
	
	my $uri  = undef;
	if ($hdrv =~ /^(\s*<([^>]*)>\s*)/)
	{
		$uri  = $2;
		$hdrv = substr($hdrv, length($1));
	}
	else
	{
		return $rv;
	}
	
	$rv->{'URI'} = URI->new_abs($uri, $base);
	
	while ($hdrv =~ /^(\s*\;\s*(\/|[a-z0-9-]+\*?)\s*\=\s*("[^"]*"|[^\s\"\;\,]+)\s*)/i)
	{
		$hdrv = substr($hdrv, length($1));
		my $key = lc $2;
		my $val = $3;
	
		$val =~ s/(^"|"$)//g if ($val =~ /^".*"$/);
		
		if ($key eq 'rel')
		{
			$val =~ s/(^\s+)|(\s+$)//g;
			$val =~ s/\s+/ /g;
			
			my @rels = split / /, $val;
			foreach my $rel (@rels)
				{ push @{ $rv->{'rel'} }, $rel; }
		}
		elsif ($key eq 'rev')
		{
			$val =~ s/(^\s+)|(\s+$)//g;
			$val =~ s/\s+/ /g;
			
			my @rels = split / /, $val;
			foreach my $rel (@rels)
				{ push @{ $rv->{'rev'} }, $rel; }
		}
		elsif ($key eq 'anchor')
		{
			$rv->{'anchor'} = URI->new_abs($val, $base)
				unless defined $rv->{'anchor'};
		}
		elsif ($key eq 'title')
		{
			$rv->{'title'} = $val
				unless defined $rv->{'title'};
		}
		elsif ($key eq 'title*')
		{
			my ($charset, $lang, $string) = split /\'/, $val;
			$string = uri_unescape($string);
			$string = decode($charset, $string);
			my $lit = bless [$string, undef, lc $lang], 'HTTP::Link::Parser::PlainLiteral';
			push @{ $rv->{'title*'} }, $lit;
		}
		elsif ($key eq 'type')
		{
			$rv->{'type'} = $val
				unless defined $rv->{'type'};
		}
		else # hreflang, plus any extended types.
		{
			push @{ $rv->{ $key } }, $val;
		}
	}
	
	return $rv;
}

=item C<< $long = relationship_uri($short) >>

This function is not exported by default. 

It may be used to convert short strings identifying relationships,
such as "next" and "prev", into longer URIs identifying the same
relationships, such as "http://www.iana.org/assignments/relation/next"
and "http://www.iana.org/assignments/relation/prev".

If passed a string which is a URI already, simply returns it as-is.

=cut

sub relationship_uri
{
	my $str = shift;

	if ($str =~ /^([a-z][a-z0-9\+\.\-]*)\:/i)
	{
		# seems to be an absolute URI, so can safely return "as is".
		return $str;
	}

	return 'http://www.iana.org/assignments/relation/' . (lc $str);

	my $url = url (lc $str), 'http://www.iana.org/assignments/relation/';
	return $url->abs->as_string;

	return undef;
}

1;

package HTTP::Link::Parser::PlainLiteral;

use overload
	'""' => sub { $_[0]->[0] },
	'eq' => sub { $_[0]->[0] eq $_[1]->[0] and lc $_[0]->[2] eq lc $_[1]->[2] };

sub value { $_[0]->[0]; }
sub lang { length $_[0]->[2] ? $_[0]->[2] : undef; }

1;
__END__

=back

=head1 BUGS

Please report any bugs to L<http://rt.cpan.org/>.

=head1 SEE ALSO

L<http://www.mnot.net/drafts/draft-nottingham-http-link-header-07.txt>

L<RDF::Trine>, L<RDF::TrineShortcuts>, L<HTTP::Response>.

L<http://n2.talis.com/wiki/RDF_JSON_Specification>.

L<http://www.perlrdf.org/>.

=head1 AUTHOR

Toby Inkster E<lt>tobyink@cpan.orgE<gt>.

=head1 COPYRIGHT AND LICENCE

Copyright (C) 2009-2010 by Toby Inkster

=head2 a.k.a. "The MIT Licence"

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

=cut
