#!/usr/bin/perl
use strict;
use lib './lib';
use Test::Simple 'no_plan';
use LEOCHARRE::CLI2 ':all';
use File::PathInfo::Ext;
use PDF::API2;
use CAM::PDF;
use PDF::Burst;
use PDF::GetImages;
use PDF::OCR2;
use vars qw/$VERSION/;
$VERSION = sprintf "%d.%02d", q$Revision: 1.2 $ =~ /(\d+)/g;

ok_part('initial file tests');

my $f = shift @ARGV;
ok( $f, 'have file argument') or exit;
ok( -f $f, "file is on disk") or exit;


my $lslha = `ls -lha '$f'`;
chomp $lslha;
ok( $lslha,"got ls -lha :\n$lslha") or exit;

my $file = `file '$f'`;
chomp $file;
ok( $file,"got 'file' output :\n$file") or exit;

ok($file=~/pdf/i,"file output has PDF") or exit;

my $pdf_version;
ok( $file=~/version\s*([\d\.]+)/,'matched version into file output');
$pdf_version = $1;

ok($pdf_version,"pdf version: $pdf_version");

my $p;
ok( $p = File::PathInfo::Ext->new($f), "instanced File::PathInfo::Ext");



my %dat;

for my $att (qw/mode size filesize_pretty md5_hex ext/){
   my $val = $p->$att;
   ok( $val,"Got att '$att' : '$val'");
}

ok( lc( $p->ext ) eq 'pdf',"ext is pdf");

ok( $p->filesize,"filesize() (has size)") or exit;





ok_part('PDF::API2');
my $pc1;
my $papi_works;
my $papi;
if( ok( eval { $papi = PDF::API2->open($f) },"PDF::API2->open()") ){
   $papi_works = 1;
   ok( $pc1 = $papi->pages, "pages() got page count $pc1");
   
   

   

}

   
ok_part('CAM::PDF');
my $camp;
my $camp_works;
my $pc2;
if( ok( eval { $camp = CAM::PDF->new($f) }, "instanced CAM::PDF") ){
   $camp_works = 1;
   ok( $pc2 = $camp->numPages,"numPages() got $pc2");
}









if($camp_works and $papi_works ){
   ok_part("compate PDF::API2 and CAM::PDF output");
   ok( $pc1 == $pc2,"PDF::API2 pagecount [$pc1] == CAM::PDF pagecount [$pc2] ");
}






my @working =();
for my $burst_method ( qw/CAM_PDF PDF_API2 pdftk/ ){
   ok_part("can we burst method: $burst_method");
   $PDF::Burst::BURST_METHOD = $burst_method;

   my @files;
   if( ok( eval { @files= PDF::Burst::pdf_burst($f) },"pdf_burst() method '$burst_method'") ){
      push @working, $burst_method;

      my $pagefiles_count = scalar @files;
      ok($pagefiles_count,"got $pagefiles_count pages bursted");

      for my $page ( @files ){
         my $fi;         
         ok( $fi = File::PathInfo::Ext->new($page),"File::PathInfo::Ext instanced for:\n$page");
         ok( $fi->filesize,"got filesize()");
      }

   }

}

my $countw = scalar @working;
unless( ok( $countw,"busrt methods that work: [$countw] @working") ){
   warn("no PDF::Burst methods worked.. the next run will likely croak on purpose to see output..\n");

   my $pdfapi;
   ok( $pdfapi = PDF::API2->open($f),"PDF::API2->open()");
   

   $PDF::Burst::DEBUG = 1;


   for my $burst_method ( qw/CAM_PDF PDF_API2 pdftk/ ){
   
      $PDF::Burst::BURST_METHOD = $burst_method;
      PDF::Burst::pdf_burst($f);
   }
}







ok_part("can we do it all and get ocr? $f");

my $po;
if( ok( $po= PDF::OCR2->new($f),'instanced PDF::OCR2') ){
   $PDF::OCR2::DEBUG = 1;
   my $text = $po->text;
   if( ok($text,"got text output.") ){
      my $o = "$f.textoutput.txt";
      open( FILE, '>',$o ) or warn("cant open $o for writing, $!") and exit;
      print FILE $text;
      close FILE;
      print STDERR "Saved text output to:\n $o\n";
   }
}















exit;





sub ok_part {
   printf STDERR "\n\n%s\n%s\n\n", '='x60, uc( "= @_" );
}












sub usage {
   qq{$0 [OPTION].. [FILE]...
Test a pdf file for ability to pass ocr.

   -h       help
   -v       version

This tests problem docs. 
These are pdf documents that for some reason are not spitting out text, and you think they should.
If so, place them inside ./t/problemdocs and run this test.

Usage example:

   perl $0 ./file.pdf

Part of PDF::OCR2 - parent package.

If the test fails, and you think this is an error- that the file should pass- then please
contact leocharre at cpan dot org, with the output and the test file.

   $0 ./file.pdf > output.txt

Attach the output.txt file, the file.pdf, and any comments- and mail to leocharre at cpan dot org.
}}


