package Finance::Bank::Norisbank;

=head1 NAME

Finance::Bank::Norisbank - Automated account interaction for customers of Norisbank GMBH.

=cut

$VERSION=0.01;
use warnings;
use strict;
use HTML::TreeBuilder;
use URI;
use URI::QueryParam;
BEGIN {
    if (0 && require Data::Dump::Streamer) {
	'Data::Dump::Streamer'->import('Dumper');
    } else {
	require Data::Dumper;
	'Data::Dumper'->import('Dumper');
    }
}
use Time::Local;
use WWW::Mechanize;
use URI::URL;
use HTML::TableExtract;
use POSIX; # for ceil and floor
use Locale::Object; # for visa/foreign currency
$|=1;

=item new

  Finance::Bank::Norisbank->new('1234567890', '12345');

Connects to the given account number, using the given PIN.  

Norisbank account numbers are ten digits, and PINs are five digits.
Remember that even though account numbers and PINs look like numbers,
they aren't numbers.  Quote them as strings when they appear as
literals in code, or the account number will become a float and lose
precesion on the right, or lose insignificant zeros on the left.

FIXME: More idiomatic interface.

Automatically logs in to check if the account information is valid.

Returns the object on success, or dies on failure.

=cut

sub new {
    my ($class, $accountnr, $pin, $callback) = @_;
    
    my $agent = WWW::Mechanize->new(
	autocheck=>1, # Die on fetch errors
	
    ) or die "Couldn't create WWW::Mechanize agent";
    $agent->env_proxy;
    $callback ||= sub {};
    my $self = bless {accountnr=>$accountnr, pin=>$pin, agent=>$agent, callback=>$callback}, $class;
    
    $self->login;
}

sub status {
    my $self=shift;
    $self->{callback}->($self, @_);
}

sub login {
    my $self = shift;
    my $agent=$self->{agent};
    
    $self->status('Getting login form...');
    $agent->get('https://onlinebanking.norisbank.de/login/login.jsp');
    $self->status('Logging in...');

    $agent->form_name('login');
    $agent->current_form->value('kontonummer', $self->{accountnr});
    $agent->current_form->value('pin', $self->{pin});
    
    $agent->submit();

    if(my $error = parse_login_error ($agent->content))
    {
	die $error;               # parse_login_error($agent->content)
    }

    $self->{last_logged_in}=time;

    $self->{page} = $self->get_page_name($agent->content);
#    $self->{frame} = 1 if($self->{page});
    # We are now on the BankStatus page.
    
    $self->status('Logged in.');

    return $self;
}

# Given the content of the page you get after logging in, extracts the
# error, or returns undef/emptylist if there has been no error.
sub parse_login_error {
    my $content = shift;
    $content=$content->content if ref $content;
    
    my $tree=HTML::TreeBuilder->new;
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($content);
    $tree->eof;
    $tree->elementify;
    
    my $error = $tree->look_down(_tag=>'td', class=>'loginfailed');
    if (!$error) {
      $tree->delete;
      return;
    }
    $error = $error->look_down(_tag=>'div')->as_text;
    $error =~ s/^\s*//;
    $error =~ s/\s*$//;
    
    $tree->delete;
    
    if ($error =~ /Die eingegebene Kontonummer ist ung.ltig/) {
      return "Invalid account number";
    } elsif ($error =~ /Die eingegebene webBanking PIN enth.lt unerlaubte Zeichen bzw\. hat eine unzul.ssige L.nge\./) {
      return "PIN wrong length or contains invalid characters";
    } elsif ($error =~ /Die eingegebene PIN ist falsch\./) {
      return "incorrect PIN";
    }
    
    return $error;
}


=item new

  $account->get_transactions($datefrom, $dateto);

Gets all transactions that occoured (C<bank_date>) between datefrom and dateto,
which should be C<time_ts> (that is, standard unix-ish timestamps).  For your 
convience, 0 is defined as the current date when used as dateto, and 
100 days prior to dateto when used as datefrom.

Note that both ends are rounded to day resolution -- the from date is rounded 
down to midnight at the beginning of the day, and to to midnight at the end of 
the day.  This means that the range used will always completely enclose, but
may exceed, the range asked for.

(This means that C<$account->get_transactions()> gets transactions from 100 
days ago through today.)

FIXME: Make the next paragraph true.

The return in list context is a list of Finance::Bank::Transactions::DE 
objects, least recent first.  If there is an error fetching the information, 
the method will signal an exception (C<die>).  
Return in scalar context is not defined, and subject to change without notice.

=cut

sub get_transactions
{
    my $self     = shift;
    my $datefrom = shift;
    my $dateto   = shift;
    my $agent    = $self->{agent};
    
    print "($datefrom, $dateto)\n";

    $dateto = timelocal(0,0,0,$dateto->[1],$dateto->[0]-1,104) 
      if ref($dateto) eq "ARRAY";
    $datefrom = timelocal(0,0,0,$datefrom->[1],$datefrom->[0]-1,104) 
      if ref($datefrom) eq "ARRAY";

    print "($datefrom, $dateto)\n";
    
    $dateto = time() if !$dateto;
    print "($datefrom, $dateto)\n";
    $datefrom = $dateto-89*24*60*60 if !$datefrom;
    print "($datefrom, $dateto)\n";
    
    foreach ($datefrom, $dateto) {
	local $ENV{TZ}="Europe/Rome";
	print scalar localtime $_;
	my (undef, undef, undef, $day, $mon) = localtime($_);
	$_=sprintf("%02d\.%02d", $day, $mon+1);
	
	print " => $_\n";
    }
    
    $self->status('Getting transactions list...', 0, 10);
    if($self->{page} ne 'bankstatus')
    {
	$self->find_page('bankstatus', $agent->content);
    }
    $self->status('Getting transactions list...', 1, 10);

    $agent->follow('1') or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 2, 10);
    $agent->follow('Auswahl Girokonten') or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 3, 10);
    
    $agent->follow('1') or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 4, 10);
    $agent->follow('Umsatzauskunft') or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 5, 10);

    $agent->follow('1') or
        warn $self->parse_error($agent) or
    $self->status('Getting transactions list...', 6, 10);
    $agent->follow('2') or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 7, 10);

#    print $agent->content, "\n\n";
    $agent->form_number(0);
    $agent->current_form->value('datumvon', $datefrom);
    $agent->current_form->value('datumbis', $dateto);
    $agent->click('');
    $self->status('Getting transactions list...', 8, 10);
    $agent->follow(1) or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 9, 10);
    $agent->follow(1) or
        warn $self->parse_error($agent);
    $self->status('Getting transactions list...', 10, 10);
    
    my $html = $agent->content;
#    print $html;
    die $self->parse_error($html) if $self->parse_error($html);
    
    $self->status("Parsing transactions list");
    my $tree=HTML::TreeBuilder->new;
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($html);
    $tree->eof;
    $tree->elementify;
    
    my @bghabens = $tree->look_down(_tag=>'a');
    
    print $tree->dump;
    
    if (!@bghabens) {
	print $agent->content;
	die "Huh, no transactions?";
    }
    
    my @trs = map {$_->parent->parent} @bghabens;
    {
	my %seen;

	@trs = grep {
#	    print $_->address, " ", $seen{$_->address}||0, "\n";
	    !$seen{$_->address}++
	} @trs;
    }
    
    print "Count \@trs: ".@trs."\n";
    print Dumper \@trs;
    
    $self->status("Parsing transactions...", 0, 0+@trs);

    my $n_trans;
    my @transactions = map {
	$self->status("Parsing transactions...", ++$n_trans, 0+@trs);
	my $tr=$_;
	my %trans;
	
#	print "\n", $tr->address, "\n";
#	print $tr->dump;
	
	$trans{bank_date}       = cleandate($tr->address('.0.0'));
	$trans{effective_date}  = cleandate($tr->address('.1.0'), $trans{bank_date});
	$trans{type}            = $tr->address('.2.0.0');
	$trans{thirdparty_name} = $tr->address('.2.0.2');
	my $comments_str        = $tr->address('.2.0.4');
	$trans{comments_str}    = $comments_str; # debugging
	$trans{amount}          = cleanamount($tr->address('.3.0'));
	$trans{balance}         = cleanamount($tr->address('.4.0'));
	$trans{infourl}         = $tr->address('.2.0')->attr('href');
print Dumper(\%trans);	
	my $badcomments=0;
	# Split comments_str out into a lines
	while (length $comments_str) {
	    warn "Oddly-wrapped comments: <$comments_str>"
	      and $badcomments++
	      unless substr($comments_str, 0, 1, '') eq ' ';
	    my $this =substr($comments_str, 0, 27, '');
	    $this .= " " x (27-length($this));
	    push @{$trans{comments}}, $this;
	}
	
	# Because of bugs on the server, this doesn't always fetch data for
	# the correct transaction.  (And wrong is worse then incomplete.)
	if ($badcomments) {
	    $agent->get($trans{infourl});
	    $agent->follow(1) or      # frame name=
	      warn $self->parse_error($agent);
	    my $tree=HTML::TreeBuilder->new;
	    $tree->no_space_compacting(1); # because norisbank is annoying.
	    print "AC: " . $agent->content, "\n";
	    $tree->parse($agent->content);
	    $tree->eof;
	    $tree->elementify;
	    print $tree->dump;
	    
	    my @data = $tree->look_down(_tag=>'td', class=>qr/label|lightgrey|bgsoll/);
#	    print "Bad data: " .Dumper(\@data);
	    
	    my @labels;
	    my $thislabel;
	    foreach my $d (@data) {
		print "Bad data: ";
		$d->dump;
		push @labels, $d->as_text and next if $d->attr('class') eq 'label';
		$thislabel = shift @labels if @labels;
		
		$d = $d->as_text;
		s/\xA0//;
		
		push @{$trans{extradata}{$thislabel}}, $d;
	    }
	    
	    $trans{firstparty_name}=$trans{extradata}{"Auftraggeber/Zahlungspflichtiger"}[0];
	    $trans{firstparty_blz} =$trans{extradata}{Bankleitzahl}[0];
	    $trans{thirdparty_blz} =$trans{extradata}{Bankleitzahl}[1];
	    $trans{firstparty_accntnum}=$trans{extradata}{Kontonummer}[0];
	    $trans{thirdparty_accntnum}=$trans{extradata}{Kontonummer}[1];
	    $trans{comments}=$trans{extradata}{Verwendungszweck};
	    # Other data from the details page is also on the summary, or (currency) is unreliable and useless.
	    $tree->delete;
	    $self->find_page('umsatz_auskunft_giro')
	      or die "Couldn't find page";
	}
	
	# Translate transaction types
	local $_=$trans{type};
	s/^.berweisung$/liability transfer (local init)/i;      # Customer-initiated transfer
	s/^Lastschrift $/liability transfer (remote init)/i;   # Remote-initiated transfer
	s|^Gehalt/Rente$|payment|i;                  # Remote paying customer
	s/^Gutschrift/asset transfer/i;
	s/^Dauerauftrag/recuring liability transfer/i;          # We told bank to pay them N much every X often.
	s/^Abschlu./banking fee/i;                    # "Closing"
	s/^Euroscheck/euro-check/i;                   # eurocheck
	s/^Scheck/check/i;                            # check
	$trans{type}=$_;
	
	$trans{misc2} and ($trans{misc2} =~ s/^ // 
	    or warn "No leading space on misc2?");
	
	return unless defined ($trans{comments}[0]);
	grok_cardreader(\%trans);
	grok_longnumber(\%trans);
	grok_visa(\%trans);

	\%trans;	
    } @trs;

    $self->status("Parsed transactions.");
    return @transactions;
#    return reverse @transactions;
}

# Aus Sicherheitgr.nden wurden Sie vom System abgemeldet.

# Given a page with a possible error message on it, return the error
# message (or undef if no error). Can also be given a HTTP::Response or
# WWW::Mechanize agent, for your convience.
sub parse_error {
    my $self = shift;
    my $content = shift;
    if (!defined $content) {
	warn "Undefined content in parse_error (pre ref) called from ".join('/', caller);
    }
    $content=$content->content if ref $content;
    if (!defined $content) {
	warn "Undefined content in parse_error (post ref) called from ".join('/', caller);
    }
    
    my $tree=HTML::TreeBuilder->new;
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($content);
    $tree->eof;
    $tree->elementify;

    # Some pages seem to give a <div class="error"><span class="error">, others just a <div class="error">.
    # We want only the tag that has actual text.
    my $error = $tree->look_down(_tag=>'span', class=>'error');
    $error ||=  $tree->look_down(_tag=>'div',  class=>'error');
    
    $tree->delete, return undef unless $error;
    $error = $error->as_text;
    $tree->delete;
    return undef if $error !~ m/\S/;

    $error =~ s/Die eingegebenen Datumsangaben sind nicht plausibel\./The date range is not possible/;
    $error =~ s/Aus Sicherheitgr.nden wurden Sie vom System abgemeldet\./Login Timeout/;
    $self->{error} = $error;  
    
    return $error;
}

# Given the content of the frameset page, figure out what the name of the page 
# is.
sub get_page_name
{
    my $self = shift;
    my $content = shift;
    
    my $tree = HTML::TreeBuilder->new();
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($self->{agent}->content);
    $tree->eof;
    $tree->elementify;
    
    my $page = $tree->look_down(_tag => 'frame', name => 'topFrame');
    
    return undef if(!$page);
    
    $page = $page->attr('src');
    $page =~ s/.*screen=(\w+).*/$1/;
    
    print "get_page_name: $page\n";
    
    return lc($page||'');
}


# Finds the frameset, then goes back (via the link at the side) until we find
# the page asked for.
sub find_page
{
    my $self = shift;
    my $name = shift;
    
    # Make sure we start with a frameset, or get_page_name will punt.
    $self->find_frameset;
    return if($self->get_page_name eq $name);
    
    print "Finding page: $name\n";
    
    $self->find_frameset;
    $self->{agent}->follow_link(name=>'rightFrame') or
      warn $self->parse_error($self->{agent});
    
    my $tree = HTML::TreeBuilder->new();
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($self->{agent}->content);
    $tree->eof;
    $tree->elementify;
    
#    print "find_page, top: ", $tree->dump, "\n";
    print "find_page, top: ", $tree->dump, "\n";
    
    my @hrefs = $tree->look_down(_tag => 'a');
    my $page = '';
    
    print "find_page: found", 0+@hrefs, "\n";
    
    foreach my $href (@hrefs)
    {
	my $uri = URI->new($href->attr('href'));
	print "find_page: uri=$uri\n";
	my $screen = $uri->query_param('screen');
	
	next unless $screen =~ /^$name/i;
	
	# FIXME: Doesn't get($href->attr('href')) DTRT?
	$self->{agent}->follow_link(url => $href->attr('href'));
	
	if($self->parse_error($self->{agent}->content))
	{
	    if($self->{error} =~ /Login Timeout/)
	    {
		# FIXME: This won't work.  We're at the beginning, we can't
		# go backward to get forward.
		$self->login();
		return $self->find_page($name, $self->{agent}->content);
	    }
	    else
	    {
		return undef;
	    }
	}
	
	return 1;
    }
    
    # If we havn't exited yet, then no a tags were found.  This is a very
    # bad thing -- perhaps we were somehow on a framset?
    return undef;
}

# Finds the main frameset by calling ->{agent}->back until we get there.
sub find_frameset
{
    my $self = shift;
    my $content = $self->{agent}->content;
    
    my $tree = HTML::TreeBuilder->new();
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($content);
    $tree->eof;
    $tree->elementify;
    
    my $frameset;

#    until($frameset = $tree->look_down('_tag', 'frameset') &&
    until($frameset = $tree->look_down('_tag' => 'frame', 'name' => 'topFrame'))
    {
	$tree->dump;
	$self->{agent}->back();
	$tree->delete;
	$tree = HTML::TreeBuilder->new();
	$tree->no_space_compacting(1); # because norisbank is annoying.
	$tree->parse($self->{agent}->content);
	$tree->eof;
	$tree->elementify;
    
	if($self->parse_error($self->{agent}->content))
	{
	    if($self->{error} =~ /Login Timeout/)
	    {
		$self->login();
	    }
	    else
	    {	      
		return undef;
	    }
	}
    }
    print "Found frameset: $frameset <" . $frameset->dump . ">\n";
    $tree->delete;
    
    $self->{page} = $self->get_page_name($self->{agent}->content());
    print "Page: ", $self->{page}, "\n";
}
    

sub grok_longnumber {
  my $trans=shift;
  
  return unless $trans->{comments}[0] =~ /^\d{27}$/;
  
  # 012345678901234567890123456
  # DDMMHHMM...........SSSSLLLL
  
  local $_=$trans->{comments}[0];
  my $day   = substr($_, 0, 2);
  my $month = substr($_, 2, 2);
  my $hour  = substr($_, 4, 2);
  my $min   = substr($_, 6, 2);
  # FIXME: Format does not provide for year (?), so assume this year.
  # FIXME: Assume same year as bank_date
  my $year  = (localtime)[5];
  local $ENV{TZ}="Europe/Rome"; # middle-european time.
  
  # Since this format doesn't give the timezone, it's rather unreliable.
  # If the sale_date is already known, don't overwrite it.
  if (!$trans->{sale_date}) {
    $trans->{sale_date}=timelocal(0,$min,$hour,$day,$month-1,$year);
  }
  
  # Bunch of ??? digits here
  $trans->{__longnumber_unknown} = substr($_, 8, 11);
  
  $trans->{store_number}   =substr($_, 19, 4)+0;
  $trans->{checkout_number}=substr($_, 23, 4)+0;
}

sub grok_cardreader {
  my $trans=shift;
  
  foreach (@{$trans->{comments}}) {
    if ($_ =~ m/^(EC |POZ|OLV)(\d{8}) (\d\d)\.(\d\d) (\d\d).(\d\d) (...)$/) {
      my %types = (
		   'EC ' => 'Electronic Cash',
		   'POZ' => 'POZ',
		   'OLV' => 'OLV',
		   
		  );
      $trans->{ccard_type} = $types{$1};
      $trans->{cardreader} = $1.$2;
      
      my ($day, $mon, $hour, $min) = ($3,$4,$5,$6);
      my $tz = $7;
      $tz =~ s|ME0|Europe/Rome|;
      local $ENV{TZ}=$tz;
      $trans->{sale_date} = timelocal(0,$min,$hour,$day,$mon-1,(localtime)[5]);
      
      return 1;
    }
  }
}

sub grok_visa {
    my $trans=shift;
    
    return unless (my($card_number) = $trans->{thirdparty_name} =~
	m/^VISAUMSATZ (\d{16})/);
    $trans->{ccard_type}='VISA';
    $trans->{ccard_number}=$card_number;
    
    return unless (my($misc1, $day, $mon) = $trans->{comments}[0] =~ 
	m/^(.*) *(\d\d)\.(\d\d)\.$/);
    return unless (my($misc2, $country, $amt) = $trans->{comments}[1] =~
	m/^(.{11}) (..) *?(\d*,\d*)$/);
    
    $trans->{misc}[0]=$misc1;
    local $ENV{TZ}='Europe/Rome';
    $trans->{sale_date} = timelocal(0,0,0,$day,$mon-1,(localtime)[5]);
    
    $trans->{misc}[1]=$misc2;
    $trans->{country}=$country;
    my $currency = Locale::Object::Country
      ->new(code_alpha2=>$country)
      ->currency
      ->code;
    $trans->{currency} = $currency;
    $trans->{base_amount} = cleannumber($amt);
    
    if (my ($rate) = $trans->{comments}[2] =~
	m/^KURS ZU EURO *?(\d*,\d*)$/) {
	    $trans->{exchange_rate}=cleannumber($rate);
	}
    
    if (my ($fees) = $trans->{comments}[3] =~
	# Note: Does not end in a $.
	m/^GEBUEHR (\d*,\d\d)/) {
	    $trans->{fees}=cleannumber($fees);
	}
}

    
##   <frame name="topFrame" scrolling="NO" noresize src="/jsps/frame_oben.jsp?screen=bankstatus&syd=151317734" >

# Amount allows for thousands seperators, and thus only accepts german style.
sub cleanamount {
    local $_=shift;
    tr/,./.,/;
    tr/-0-9.//cd;
    $_+=0; # Nummify to clean up leading 0s.
    return $_;
}

# Number allows for both german and english style, and thus does not support
# thousands seperators.
sub cleannumber {
    local $_=shift;
    tr/,./../;
    tr/-0-9.//cd;
    $_+=0;
    return $_;
}

sub cleandate {
    local $_=shift;
#    print "Date: $_\n";
    tr|.-/|---|;
    tr|-0-9||cd;
    my ($day,$mon,$year)=split(/-/);
    if (not length $year) {
	$year = (localtime)[5];
    }
    $_=timelocal(0,0,0,$day,$mon-1,$year);
#    print "Date: $_\n";
    return $_;
}

1;

=head1 AUTHOR

James Mastros, <james@mastros.biz>.  theorbtwo on perlmonks.org.
  
=head1 BUGS

Known bugs: we aren't always able to get the verwindungszweck from the bank.  
This is because Norisbank's online banking is highly obnixious.  Working code 
to fix this is very much welcome.

The grok_* routines are based on guesswork.  There are still many unknowns, purticularly in the "longnumber" style.  If you think you have them figured out, or you think there is an error in my interpretation of them, please let me know.  If you find a standard for them, please, please, please let me know.

The best place to tell me about bugs or feature requests is probably on rt.cpan.org.
  
=cut

