package HTML::Mail;

use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter MIME::Lite HTML::Parser);

our $VERSION = '0.01_05';
$VERSION = eval $VERSION;    # see L<perlmodstyle>

# Preloaded methods go here.
use LWP::UserAgent;
use URI;
use HTML::Parser;
use MIME::Lite;
use Carp;

use vars qw($SIMPLE_CID);

sub new {
	my $package = shift;
	my %params  = @_;

	my $self = MIME::Lite->new(@_, Type => 'multipart/alternative');

	if (!(exists($params{'HTML'}) || exists($params{'Text'}))) {
		croak "No HTML or Text parameter send";
	}
	if (exists($params{'HTML'})) {
		$self->{'HTML'} = $params{'HTML'};
	}
	if (exists($params{'Text'})) {
		$self->{'Text'} = $params{'Text'};
	}
	$self->{'html_charset'} = $params{'html_charset'} ||'iso-8859-15';
	$self->{'text_charset'} = $params{'html_charset'} || 'iso-8859-15';

	%params = (
	  %params, useragent => 'HTML::Mail',
	  timeout => 60
	);

	$self->{'_ua'} = LWP::UserAgent->new();

	$self->{'_ua'}->agent($params{'useragent'});
	$self->{'_ua'}->timeout($params{'timeout'});

	# Some servers give back a 206 Partial Content even though they served all the content
	#$self->{'_ua'}->max_size(1024 * 1024);    #One megabyte of content limit (just playing safe)

	my $response = $self->{_ua};

	return bless $self, $package;

}

sub build {
	my $self = shift;

	$self->_parse_html();
	$self->_attach_media();
	$self->_attach_text();
	$self->_build_all();
}

sub _parse_html {
    my $self = shift;

    #set up the HTML parser
    $self->init(
        api_version => 3,
        start_h     => [ \&_tag_start, 'self, tag, attr, attrseq' ],
        end_h       => [ \&_tag_end, 'self, tag, attr, attrseq' ],
        text_h      => [ \&_tag_end, 'self, text' ],
    );

    #clean any possible links that exists
    $self->_reset_links();
    $self->_reset_html();

    eval {
        $self->get( $self->{'HTML'} );
    };
    if (@_ or not $self->{'_response'}->is_success) {
        delete( $self->{'_html_base'} );
        if ( $self->{'HTML'} =~ /html/i ) {
            $self->parse( $self->{'HTML'} );
        } else {
            die @_;
        }
    }else {
        $self->{'_html_base'} = $self->{'_response'}->base();
        $self->parse( $self->{'_response'}->content );
    }
}

#Makes a GET request and returns the content

sub get {
	my $self = shift;
	my $uri  = shift;

	if (!$self || !$self->{'_ua'}) {
		die "User agent not defined";
	}

	if (!$uri) {
		die "uri not defined";
	}

	my $response = $self->{'_ua'}->get($uri);
	$self->{'_response'} = $response;

	if (!$response->is_success) {
		croak "Error while making request [", $response->request->uri, "]\n", $response->status_line;
	}

	return $response->content;
}

sub _add_html {
	my ($self, $tag, $attr, $attrseq) = @_;
	if ($#_ == 1) {
		$self->{'html_content'} .= $tag;    #actually just text
	}
	else {
		$self->{'html_content'} .= "<$tag";
		$self->{'html_content'} .= " $_=\"$attr->{$_}\"" for (@$attrseq);
		$self->{'html_content'} .= ">";
	}
}

sub _get_html {
	my $self = shift;
	return $self->{'html_content'};
}

sub _add_link {
	my $self = shift;
	($#_ == 0) or die "Can only add one link";

	my $uri = URI->new_abs($_[0], $self->{'_html_base'});

	$self->{'links'}->{$uri} ||= $self->_generate_cid();
	return $self->{'links'}->{$uri};
}

sub _get_links {
	my $self = shift;
	return $self->{'links'};
}

sub _reset_links {
	my $self = shift;
	$self->{'links'} = {};
	$self->{'cid'} = 0;
}

sub _reset_html {
	my $self = shift;
	$self->{'html_content'} = '';
}

sub _tag_start {
	my $self = shift;
	my ($tag, $attr, $attrseq) = @_;


	if ($tag eq 'base' and not exists($self->{'_html_base'})) {
		$self->{'_html_base'} = $attr->{'href'};
	}
	
	$self->_tag_filter_link($attr, 'href') if (($tag eq 'link') and (exists($attr->{'rel'}) and $attr->{'rel'} eq 'stylesheet'));
	$self->_tag_filter_link($attr, 'background');
	$self->_tag_filter_link($attr, 'src') if ($tag ne 'script');
	$self->_add_html(@_);
}

sub _tag_filter_link {
	my ($self, $attrs, $attr) = @_;
	if (exists($attrs->{$attr})) {
		my $link = $attrs->{$attr};
		$attrs->{$attr} = "cid:" . $self->_add_link($link);
	}
	return;
}

sub _tag_end {
	my $self = shift;
	$self->_add_html(@_);
}

sub _tag_text {
	my $self = shift;
	$self->_add_html(@_);
}

sub _generate_cid {
	my $self = shift;
	return ($SIMPLE_CID ? '': rand(10000)) . "_" . $self->{'cid'}++;
}

sub _attach_media {
	my $self = shift;

    my $related = MIME::Lite->new(
        'Type'        => 'multipart/related',
        'Datestamp'   => undef,
        'Disposition' => 'inline',
    );

    my $html_part = MIME::Lite->new(
        'Type'        => 'text/html',
        'Encoding'    => 'quoted-printable',
        'Data'        => $self->_get_html,
        'Disposition' => 'inline',
        'Datestamp'   => undef,
    );

	$html_part->attr('content-type.charset' => $self->{'html_charset'});

	#attach the html part
	$related->attach($html_part);

	while (my ($link, $cid) = each(%{ $self->_get_links })) {
		$related->attach($self->_get_media($link, $cid));
	}
	$self->{'related_part'} = $related;

	#some cleanup
	delete($self->{'_response'});
	$self->_reset_html;
}

sub _get_media {
	my $self = shift;
	my $link = shift;
	my $cid  = shift;

	$self->get($link);

	my $response = $self->{'_response'};    #holds the response of the previous get

	my $part = MIME::Lite->new(
	  'Encoding'    => 'base64',
	  'Disposition' => 'attachment',
	  'Data'        => $self->{'_response'}->content,
	  'Datestamp'   => undef,
	);

	$part->attr('Content-type' => $self->{'_response'}->content_type);
	$part->attr('Content-ID'   => "<$cid>");

	return $part;
}

sub _attach_text {
	my $self    = shift;
	my $text    = $self->{'Text'};
	my $content = $text;

	#If it fails, Text is the actual text and not an URI
	eval { $content = $self->get($text); };

    my $text_part = new MIME::Lite(
        'Type'        => 'TEXT',
        'Encoding'    => 'quoted-printable',
        'Disposition' => 'inline',
        'Data'        => $content,
        'Datestamp'   => undef,
    );
	$text_part->attr('content-type.charset' => $self->{'text_charset'});

	return $self->{'text_part'} = $text_part;
}

sub _build_all {
	my $self = shift;

	$self->attach($self->{'text_part'});
	$self->attach($self->{'related_part'});

}

1;
__END__
# Below is stub documentation for your module. You'd better edit it!

=head1 NAME

HTML::Mail - Perl extension for sending emails with embeded HTML and media

=head1 SYNOPSIS

  use HTML::Mail;

  ### initialisation
  my $html_mail = HTML::Mail->new(
  HTML    => 'http://www.cpan.org',
  Text    => 'This is the text representation of the webpage http://www.cpan.org',
  From    => 'me@myhost.org',
  To      => 'you@yourhost.org',
  Subject => 'CPAN webpage');
  
  ### Build the message
  $html_mail->build();

  ### Send the email (inherited from MIME::Lite)
  $html_mail->send();

  ### Dump as string (inherited from MIME::Lite)
  my $sting = $html_mail->as_string();

=head1 DESCRIPTION

B<HTML::Mime> is supposed to help with the task of sending emails with html amd images (or other media) embeded.
It uses B<MIME::Lite> for all MIME related jobs, B<HTML::Parser> to see related files and change the URIs and B<LWP> to retrieve the related files.

=head2 Attributes

All attributes are B<case sensitive>.

Constructor supports these attributes:

=over 4

=item HTML [URI or STRING]

The URL of HTML data to send in email.
Most comons URLs are either I<http://www.site.org> or I<file:///home/user/page.html>

If you prefer, you can use it to specify the actual HTML data as a string 
HTML=>'<html><body><h1>Welcome to HTML::Mail</h1></body></html>'

=item Text [URI or STRING]

The URL of Text data to send in email. Similar to the HTML attribute.

=item From, To, Subject

Inherited from B<MIME::Lite>. Sender, Recipient and Subject of the message.

=item html_charset

Charset of the HTML part of the email. Defaults to I<iso-8859-15>.

=item text_charset

Charset of the text part of the email. Defaults to I<iso-8859-15>.

=item useragent

Useragent sent wen using LWP to retrieve remote documents

=item timeout

Timeout of LWP useragent

=back

=head1 EXPORT

None by default.

=head1 SEE ALSO

B<MIME::Lite> (this module inherits from it)

B<HTML::Parser> (used in this module to parse html)

B<LWP> (used to fetch content)

=head1 AUTHOR

Cludio Valente, E<lt>ClaudioV@technologist.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2003 by Cludio Valente

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. 

=cut

