#!/pro/bin/perl

# csv-check: Check validity of CSV file and report
#	   (m)'08 [10 Aug 2008] Copyright H.M.Brand 2007-2009

use strict;
use warnings;

our $VERSION = "1.2";

sub usage
{
    my $err = shift and select STDERR;
    print <<EOU;
usage: csv-check [-s <sep>] [-q <quot>] [file.csv]
       -s <sep>   use <sep>   as seperator char. Auto-detect, default = ','
       -q <quot>  use <quot>  as quotation char. Default = '"'
EOU
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling nopermute passthrough);
my $sep;	# Set after reading first line in a flurry attempt to auto-detect
my $quo = '"';

GetOptions (
    "help|?"	=> sub { usage (0); },

    "c|s=s"	=> \$sep,
    "q=s"	=> \$quo,
    ) or usage (1);

use Text::CSV_XS;

if (@ARGV && -f $ARGV[0] && !-s $ARGV[0]) {
    print STDERR "$ARGV[0] is empty\n";
    exit 0;
    }

my ($bin, $rows, %cols, $firstline) = (0, 0);
unless ($sep) { # No sep char passed, try to auto-detect;
    while (<>) {
	m/\S/ or next;	# Skip empty leading blank lines
	$sep = m/["\d],["\d,]/ ? ","  :
	       m/["\d];["\d;]/ ? ";"  :
	       m/["\d]\t["\d]/ ? "\t" :
	       # If neither, then for unquoted strings
	       m/\w,[\w,]/     ? ","  :
	       m/\w;[\w;]/     ? ";"  :
	       m/\w\t[\w]/     ? "\t" :
				 ","  ;
	$firstline = $_;
	$rows++;
	last;
	}
    }

my $csv = Text::CSV_XS-> new ({
    sep_char       => $sep,
    quote_char     => $quo,
    binary         => 1,
    keep_meta_info => 1,
    });

sub done
{
    print "Checked with $0 $VERSION using Text::CSV_XS $Text::CSV_XS::VERSION\n";
    my @diag = $csv->error_diag;
    if ($diag[0] == 2012 && $csv->eof) {
	my @coll = sort { $a <=> $b } keys %cols;
	local $" = ", ";
	my $cols = @coll == 1 ? $coll[0] : "(@coll)";
	print "OK: rows: $rows, columns: $cols\n";
	print "    sep = <$sep>, quo = <$quo>, bin = <$bin>\n";
	exit 0;
	}

    if ($diag[2]) {
	print "$ARGV line $./$diag[2] - $diag[0] - $diag[1]\n";
	my $ep  = $diag[2] - 1; # diag[2] is 1-based
	my $err = $csv->error_input . "         ";
	substr $err, $ep + 1, 0, "*";
	substr $err, $ep,     0, "*";
	($err = substr $err, $ep - 5, 12) =~ s/ +$//;
	print "    |$err|\n";
	}
    else {
	print "$ARGV line $. - $diag[1]\n";
	}
    exit $diag[0];
    } # done

sub stats
{
    my $r = shift;
    $cols{scalar @$r}++;
    grep { $_ & 0x0002 } $csv->meta_info and $bin = 1;
    } # stats

if ($firstline) {
    $csv->parse ($_) or done;
    stats [ $csv->fields ];
    }

while (my $row = $csv->getline (*ARGV)) {
    $rows++;
    stats $row;
    }
done;
