# !/usr/bin/perl
$|++;
use warnings;
use strict;
use HTML::TreeBuilder;
use Data::Dumper;
use Time::Local;

use strict;
use WWW::Mechanize;
use WWW::Mechanize::FormFiller;
use URI::URL;
use HTML::TableExtract;

my $agent = WWW::Mechanize->new( autocheck => 1 );
my $formfiller = WWW::Mechanize::FormFiller->new();
$agent->env_proxy();

$agent->get('https://onlinebanking.norisbank.de/login/login.jsp');
$agent->form(1) if $agent->forms and scalar @{$agent->forms};
$agent->form_name('login');
#  { local $^W; $agent->current_form->value('kontonummer', '7967197003'); };
$agent->current_form->value('kontonummer', '7967197003');
#  { local $^W; $agent->current_form->value('pin', '46613'); };
$agent->current_form->value('pin', '46613');
$agent->submit();

# logged in 

$agent->follow('1');  # mainFrame 
$agent->follow('Auswahl Girokonten') or
  die parse_error($agent);

$agent->follow('1'); #  mainFrame
$agent->follow('Umsatzauskunft');

$agent->follow('1'); #  mainFrame
$agent->follow('2');
$agent->form_number(1);
$agent->current_form->value('datumvon', '01.04');
$agent->current_form->value('datumbis', '24.06');
$agent->click('');
$agent->follow(1);
$agent->follow(1);

my $html = $agent->content;
die parse_error($agent) if parse_error($agent);
  
my $tree=HTML::TreeBuilder->new;
$tree->no_space_compacting(1); # because norisbank is annoying.
$tree->parse($html);
$tree->eof;
$tree->elementify;

my @bghabens = $tree->look_down(class=>'bghaben');

if (!@bghabens) {
    print $agent->content;
    die "Huh, no transactions?";
}

my @trs = map {$_->parent} @bghabens;
{
    my %seen;
    
    @trs = grep {
	print $_->address, " ", $seen{$_->address}, "\n";
	!$seen{$_->address}++
    } @trs;
}
my @transactions = map {
    my $tr=$_;
    my %trans;
    
    print "\n", $tr->address, "\n";
    
#    print $tr->dump;
    
    $trans{bank_date}     = cleandate($tr->address('.0.0'));
    $trans{effective_date}= cleandate($tr->address('.1.0'));
    $trans{type}          = $tr->address('.2.0.0');
    $trans{misc1}         = $tr->address('.2.0.2');
    $trans{misc2}         = $tr->address('.2.0.4');
    $trans{amount}        = cleanamount($tr->address('.3.0'));
    $trans{balance}       = cleanamount($tr->address('.4.0'));
    $trans{infourl}       = $tr->address('.2.0')->attr('href');
    
    $agent->get($tr->address('.2.0')->attr('href'));
    $agent->follow(1); # frame name=
    my $tree=HTML::TreeBuilder->new;
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($agent->content);
    $tree->eof;
    $tree->elementify;
    print $tree->dump;
    
    my @data = $tree->look_down(_tag=>'td', class=>qr/label|lightgrey|bgsoll/);
    my @labels;
    my $thislabel;
    foreach (@data) {
	push @labels, $_->as_text and next if $_->attr('class') eq 'label';
	$thislabel = shift @labels if @labels;
	
	$_ = $_->as_text;
	s/\xA0//;
	
	push @{$trans{extradata}{$thislabel}}, $_;
    }
    $tree->delete;
    
    # Translate transaction types
    local $_=$trans{type};
    s/^.berweisung$/liability transfer (local init)/i;      # Customer-initiated transfer
    s/^Lastschrift $/liability transfer (remote init)/i;   # Remote-initiated transfer
    s|^Gehalt/Rente$|payment|i;                  # Remote paying customer
    s/Gutschrift/asset transfer/i;
    s/Dauerauftrag/recuring liability transfer/i;          # We told bank to pay them N much every X often.
    s/Abschlu./banking fee/i;                    # "Closing"
    s/Euroscheck/euro-check/i;                   # eurocheck
    s/Scheck/check/i;                            # check
    $trans{type}=$_;
    
    $trans{misc2} and ($trans{misc2} =~ s/^ // 
      or warn "No leading space on misc2?");
    
    $trans{groked}=0;
    grok_ga(\%trans);
    grok_minimal(\%trans);
    grok_visa(\%trans);
    grok_ec(\%trans);
    grok_poz(\%trans);
    
    print Dumper \%trans;
    \%trans;
} @trs;

# Amount allows for thousands seperators, and thus only accepts german style.
sub cleanamount {
    local $_=shift;
    tr/,./.,/;
    tr/0-9.//cd;
    $_+=0; # Nummify to clean up leading 0s.
    return $_;
}

# Number allows for both german and english style, and thus does not support
# thousands seperators.
sub cleannumber {
    local $_=shift;
    tr/,./../;
    tr/0-9.//cd;
    $_+=0;
    return $_;
}

sub cleandate {
    local $_=shift;
#    print "Date: $_\n";
    tr|.-/|---|;
    tr|-0-9||cd;
    my ($day,$mon,$year)=split(/-/);
    if (not length $year) {
	$year = (localtime)[5];
    }
    $_=timelocal(0,0,0,$day,$mon-1,$year);
#    print "Date: $_\n";
    return $_;
}

sub grok_ga {
    my $trans=shift;
    return if not $trans->{misc1};
    if ($trans->{misc1} =~ /^GA NR(\d{8}) BLZ(\d{8}) (\d)$/) {
	$trans->{type} = 'ATM';
	$trans->{atm_number}=$1;
	$trans->{atm_blz}=$2;
	$trans->{atm_unknown}=$3;
	# The meaning of the * vs space before the GEB is unknown, but
	# probably just a different ATM being obnixious.
	# The lack of non-EUR currency is a bug.
	$trans->{'misc2'} =~ m<^(\d\d)\.(\d\d)/(\d\d)\.(\d\d)UHR\ (.*?) EUR([\d ]{8},\d\d)[* ]GEB\.EUR([\d ]\d,\d\d)$>
	  or warn "Strange ATM transaction?" and return;
	my ($day,$month,$hour,$minute)=($1,$2,$3,$4);
	# $trans->{bill_date}
	$trans->{atm_desc}=$5;
	$trans->{atm_amount}=cleanamount($6);
	$trans->{atm_fees}=cleanamount($7);
	
	$trans->{groked}++;
    }
}

# Called "minimal" because that's the place we shop the most that uses this
# comment-info format.  
# Also used by Karstadt, HL, Media Markt, C&A...
sub grok_minimal {
    my $trans=shift;
    return unless $trans->{misc2};
    return unless ($trans->{misc2} =~ m<^(\d{27})>);
    $trans->{__minimal_info}++;
    
    local $_=$1;
    my $day   = substr($_, 0, 2);
    my $month = substr($_, 2, 2);
    my $hour  = substr($_, 4, 2);
    my $min   = substr($_, 6, 2);
    # FIXME: Format does not provide for year (?), so assume this year.
    # FIXME: Assume not in future?
    my $year  = (localtime)[5];
    local $ENV{TZ}="MET"; # middle-european time.
    
    # Scalar localtime for debugging
    $trans->{bill_date}=timelocal(0,$min,$hour,$day,$month-1,$year);

    # Bunch of ??? digits here
    
    $trans->{store_number}   =substr($_, 19, 4)+0;
    $trans->{checkout_number}=substr($_, 23, 4)+0;
    
    $trans->{groked}++;
}

sub grok_visa {
    my $trans=shift;
    
    # VISA transactions
    return unless $trans->{misc1};
    return unless ($trans->{misc1} =~ /^VISAUMSATZ (\d{16})$/);
    $trans->{type}='credit card';
    $trans->{ccard_type}='VISA';
    $trans->{ccard_number}=$1;
    
    # CAFE PRESS.COM       26.02. 877-809-165 US        22,71 KURS ZU EURO       1,237300 GEBUEHR 00000000,32
    # USAIR       03741773 21.04. BERLIN      DE         0,00 0,000000 
    return unless ($trans->{misc2} =~ /^(.{20}) (\d\d)\.(\d\d)\. (.{11}) (..) ([ \d]{9},\d\d) (.*)/);
    $trans->{info1}=$1;
    my $day=$2;
    my $mon=$3;
    # FIXME: Format does not provide for year (?), so assume this year.
    # FIXME: Assume not in future?
    my $year  = (localtime)[5];
    local $ENV{TZ}="MET"; # middle-european time.
    $trans->{bill_date}=timelocal(0,0,0,$day,$mon-1,$year);
    $trans->{info2}=$4;
    $trans->{country}=$5;
    $trans->{base_amount}=cleanamount($6) || $trans->{amount};
    $7 =~ m/ *(KURS ZU EURO ([ \d]{7},\d{6}))? *(GEBUEHR ([ \d]{8}[.,]\d\d))?/;
    $trans->{exchangerate} = cleannumber $2 || 1;
    $trans->{fees} = cleannumber $4 || 0;
    
    # Amount = amount_remote_currency/exchangerate + fees
    my $err = $trans->{amount} - 
      ($trans->{base_amount}/$trans->{exchangerate} +
       $trans->{fees});
    warn "Error $err" if abs($err)>0.005;

    $trans->{groked}++;
}

sub grok_ec {
    my $trans = shift;
    
    return unless $trans->{misc2} 
      and ($trans->{misc2} =~ m/^(?:EC |ELV|POZ)(\d{8}) (\d\d)\.(\d\d) (\d\d)\.(\d\d) (...)$/);
    # print "Past EC regex.\n";
    #                               1       2       3      4       5      6
    # $trans->{???} = $1; # ???
    # print $trans->{misc2}, "\n";
    my ($day, $mon, $hour, $min) = ($2,$3,$4,$5);
    my $tz = $6;
    $tz =~ s/ME0/MET/;
    local $ENV{TZ}=$tz;
    $trans->{sale_date} = timelocal(0,$min,$hour,$day,$mon-1,(localtime)[5]);

    $trans->{groked}++;
}

# This is the version of POZ transactions that do not begin with a 27 digit
# number.
sub grok_poz {
    my $trans=shift;
    
    return unless $trans->{misc2};
    # If the "minimal" 27-digit number is there, don't try to do this
    # (far more heuristic) parsing.
    return if ($trans->{groked});
    
    return unless $trans->{misc2} =~ /((?:POZ|ELV)\d{8}) (\d\d)\.(\d\d) (\d\d)\.(\d\d) (...)/;
    my ($reader, $day, $month, $hour, $minute, $tz) = ($1,$2,$3,$4,$5,$6);
    
    $tz =~ s/ME0/MET/;
    local $ENV{TZ}=$tz;
    my $date = timelocal(0,$minute,$hour,$day,$month-1,(localtime)[5]);
    $trans->{bill_date}=$date;
#    $trans->{type}=$type;
    $trans->{card_reader}=$reader;
    
    $trans->{groked}++;
}

# Given a page with a possible error message on it, return the error
# message (or undef if no error). Can also be given a HTTP::Response or
# WWW::Mechanize agent, for your convience.
sub parse_error {
    my $content = shift;
    $content=$content->content if ref $content;
    
    my $tree=HTML::TreeBuilder->new;
    $tree->no_space_compacting(1); # because norisbank is annoying.
    $tree->parse($content);
    $tree->eof;
    $tree->elementify;

    # Some pages seem to give a <div class="error"><span class="error">, others just a <div class="error">.
    # We want only the tag that has actual text.
    my $error = $tree->look_down(_tag=>'span', class=>'error');
    $error ||=  $tree->look_down(_tag=>'div',  class=>'error');
    
    $tree->delete, return undef unless $error;
    $error = $error->as_text;
    $tree->delete;
    return undef if $error !~ m/\S/;
    
    $error =~ s/Die eingegebenen Datumsangaben sind nicht plausibel\./The date range is not possible/;
    
    return $error;
}
