# script to install usf norms on local machine via usf website: install_norms_usf.pl
use 5.10.0;
use strict;
use warnings;
use Benchmark qw(cmpthese :hireswallclock); # cmpthese($count, { 'Name1' => sub { ...code1... }, 'Name2' => sub { ...code2... },});
use Carp qw(carp croak);
use Config;
use feature qw(say state switch); # given(state $foo){when(/^ac/){say "$foo has a or c";}when([3,5]){say "$foo is 3 or 5";}default{say "Don't know foo"; }}
use FindBin qw/$Bin/;
use File::Basename;# basename, dirname
use File::Compare; # compare("file1","file2") == 0?
use File::Copy;    # copy("file1","file2"); copy("Copy.pm",\*STDOUT); move("fileA","fileB");
use File::Find;    # find(\&wanted, @dirs_to_search);    sub wanted {-T && say "text file: $File::Find::name";}
use File::Path qw(make_path remove_tree); # make_path(list of paths), remove_tree('/tmp', '0ormore', {keep_root => 1})
use File::Slurp;   # $text|@lines = read_file('path'); write_file|append_file('path', @lines); prepend_file( $file, $header|\@lines ) ;@files = read_dir('path')
use File::Spec;    # catfile, catdir, splitdir
use File::stat;    # $st = stat($file); say "$file is exe with links" if(($st->mode & 0111) && $st->nlink > 1); uid, size, atime, mtime, ctime, blksize
use List::AllUtils qw(first max maxstr min minstr reduce shuffle sum);
use String::Util qw(:all); # hascontent, nocontent, crunch, randword(length, strip_vowels => 1, numerals => 0), fullchomp, jsquote, repeat($string, count)
#use lib dirname (__FILE__) . "/modulesdir";

sub _run_install {
    require File::Path;
    require File::Temp;
    my $put_dir = File::Temp::tempdir(CLEANUP => 0);
    _download_db($put_dir);
    _compose_db($put_dir);
    my $install_dir = _install_db($put_dir);
    #unlink_on_destroy
	#File::Temp::remove_tree($put_dir);
    return _identify_db($install_dir) ; # check again
}

sub _download_db {    # Download the files, copying to tmp dir:
    my $put_dir = shift;
    $| = 1;
    say "Attempting to download database files:\n\tfrom:\t'http://w3.usf.edu/FreeAssociation/AppendixA/'\n\tto:\t'$put_dir'\n";

    my @file_names = (qw/A-B C D-F G-K L-O P-R S T-Z/);

    require LWP::UserAgent;
    my $ua = LWP::UserAgent->new;
    $ua->agent('Perl Lingua::Norms::USF DB downloader');

    foreach my $fn(@file_names) {
        print "\t$fn file:\t";
        my $req = HTTP::Request->new(POST => 'http://w3.usf.edu/FreeAssociation/AppendixA/Cue_Target_Pairs.' . $fn);
        my $res = $ua->request($req);
        if ($res->is_success) {
            write_file(File::Spec->catfile($put_dir, $fn), $res->content) || do { warn "\t$fn file: Could not write file; check permissions."; next;};
            say 'ok';
        }
        else {
            say $res->status_line, ': Check URL and/or internet connection.';
        }
    }
    
    say 'Finished downloading files from w3.usf.edu.';
    return scalar(read_dir($put_dir)) == scalar(@file_names) ? 1 : 0; # check got as many files written as requested
}

sub _compose_db {
    my $put_dir = shift;
    
    $| = 1;
    say "Composing database files within '$put_dir'\n(Allow some few minutes for processing each downloaded file.)\n";
    
    my @src_files = read_dir($put_dir);
    my $last_letter;
    my $headings = join(',', qw/CUE TARGET NORMED GN PN FSG BSG MSG OSG MN MMIAS ON OMIAS QSS QFR QCON QH QPS QMC QPR QRSG QUC TSS TFR TCON TH TPS TMC TPR TRSG TUC/);
    foreach my $letter('A' .. 'Z') {
        write_file(File::Spec->catfile($put_dir, $letter . '.csv'), "$headings\n");
    }
   
    my @file_names = (qw/A-B C D-F G-K L-O P-R S T-Z/);
    foreach my $fn(@file_names) {
        my @lines = read_file(File::Spec->catfile($put_dir, $fn)) or die 'Raw datafiles not available - ensure files can be downloaded.';
        my $i;
        say "\tProcessing $fn file:\t";
        for ($i = 0; $i < scalar(@lines); $i++) {
            my $line = $lines[$i];
            $line =~ s/,\s+?/,/g;
            next if nocontent($line) or $line =~ /^</ or $line =~ /^CUE,\s?TARGET,\s?NORM/; # skip heading, tagged and empty lines
            
            # Substitutions for part-of-speech field:
            $line =~ s/^(([^,]*,){17})ADJ/$1AJ/;# most rows for adjectives give AJ, some give ADJ
            $line =~ s/^(([^,]*,){26})ADJ/$1AJ/; 
            $line =~ s/^(([^,]*,){17})PRP/$1PP/;# most rows for preopositions give PP, some give PRP
            $line =~ s/^(([^,]*,){26})PRP/$1PP/;
            $line =~ s/^(([^,]*,){17})N,/$1NN,/;
            $line =~ s/^(([^,]*,){26})N,/$1NN,/;
            $line =~ s/^(([^,]*,){17})C,/$1CJ,/;
            $line =~ s/^(([^,]*,){26})C,/$1CJ,/;
            $line =~ s/^(([^,]*,){17})I,/$1IJ,/;
            $line =~ s/^(([^,]*,){26})I,/$1IJ,/;
            $line =~ s/^(([^,]*,){17})P,/$1PN,/;
            $line =~ s/^(([^,]*,){26})P,/$1PN,/;
            $line =~ s/^(([^,]*,){17})AD,/$1AV,/;
            $line =~ s/^(([^,]*,){26})AD,/$1AV,/;
            $line =~ s/^(([^,]*,){17})V,/$1VB,/;
            $line =~ s/^(([^,]*,){26})V,/$1VB,/;
            
            # write this line to the appropriate file (based on first letter of cue):
            my $letter = substr($line, 0, 1);# isolate the first letter of the cue in this line  
            next if nocontent($letter);            
            append_file(File::Spec->catfile($put_dir, $letter . '.csv'), $line); #has newline
         }
		 print "ok\n";
    }
}

sub _install_db {
    my $put_dir = shift;
    my $save_dir = File::Spec->catdir($Config{'installsitelib'}, qw/Lingua Norms USF db/); # within local Perl architecture
    if (!-e $save_dir) {
		make_path($save_dir) || die "Could not make install directory '$put_dir'";
    }
    my $fname;
    require File::Copy;
    foreach my $letter('A' .. 'Z') {
        $fname = $letter . '.csv';
        File::Copy::move(File::Spec->catfile($put_dir, $fname), File::Spec->catfile($save_dir, $fname));
    }
}

sub _make_relationaldb { # not implemented
    my $db_dir = shift;
    my $tmp_dir = '';
    
    # make cue-only & assoc-only tables
    my $cue_headings = join(',', qw/CUE QSS QFR QCON QH QPS QMC QPR QRSG QUC/);
    my $ass_headings = join(',', qw/CUE TARGET NORMED GN PN FSG BSG MSG OSG MN MMIAS ON OMIAS TISR/); # if TGT is a resonator
    my $tgt_headings = join(',', qw/TARGET TSS TFR TCON TH TPS TMC TPR TRSG TUC/);
    
    foreach my $letter('A' .. 'Z') {
        write_file(File::Spec->catfile($tmp_dir, $letter . '.csv'), "$cue_headings\n");
    }
        
    my @file_names = (qw/A-B C D-F G-K L-O P-R S T-Z/);
    foreach my $fn(@file_names) {
        my @lines = read_file(File::Spec->catfile($tmp_dir, $fn));
        my $i;
        say "\tProcessing $fn file";
        for ($i = 0; $i < scalar(@lines); $i++) {
            my $line = crunch($lines[$i]);
            $line =~ s/\s,/,/g;
            next if nocontent($line) or $line =~ /^</ or $line =~ /^CUE,\s?TARGET,\s?NORM/; # skip heading, tagged and empty lines
            my $letter = substr($line, 0, 1);# isolate the first letter of the cue in this line  
            
                        
            # QPR, QRSG and QUC have a non-alphanumeric char if the target is not normed, although
            # these have values for other targets - replace the char with a proper value: 
            # (or let this indicate that this target does not figure in the calculation of the statistic
         }
     }
}

sub _db_filename {
    my $fn = shift;
    my @ari = split/-/, $fn;
    my $new_fn = scalar@ari > 1 ? join('_', ($ari[0] .. $ari[1])): $fn;
    return $new_fn;
 }

 # $dir = _identify_db(); # will check environment variable 'USFNORMS_DB', $Config install path and local doc folder
# $dir = _identify_db(dbdir => 'adir'); # will check given directory first, and then as above
# $dir = _identify_db(dbdir => ['adir', 'bdir']); # will check these dirs first, and then as above

sub _identify_db {
    my (%args) = @_;
    my $dir = 0;
    my $dirs = [];
    if (!defined($args{'dbdir'})) {
        if (keys(%ENV) && exists($ENV{'USFNORMS_DB'})) {
            unshift @{$dirs}, _dirpath_ok($ENV{'USFNORMS_DB'});
        }
    }
    else {
       $dirs = ref $args{'dbdir'} ? $args{'dbdir'} : [$args{'dbdir'}];
    }
    foreach (@{$dirs}) {
        last if $dir = _dirpath_ok($_);
    }
    if (!$dir) {
        $dirs = _candidate_dirs();
        foreach (@{$dirs}) {
            last if $dir = _dirpath_ok($_);
        }
    }
    #if ($dir) {
    #    say "Path to potential DB found: Please add/change the ENVIRONMENT VARIABLE named USFNORMS_DB to the value '$dir' (on Windows, search 'environment' to pull up the relevant dialog box).";
    #}
    return $dir; # path or 0
}

sub _dirpath_ok {
    my $dirpath = shift;
    return undef if nocontent($dirpath); # check again
    return undef if ! -d $dirpath; # is a directory?
    my $files_aref = read_dir($dirpath) || return undef; # is readable dir?
    my $nfiles = scalar(@{$files_aref});
    return undef if !$nfiles or $nfiles < 26; # readable dir has files, & enough files?
    # check that the tables are readable (as text files) and not empty, and have comma as delimiter:
    my ($path, $fname, $table) = ();
    foreach $fname('A' .. 'Z') {
        $fname .= '.csv';
        $table = File::Spec->catfile($dirpath, $fname);
        return undef if !-e $table or !-r $table or !-T $table or -z $table;
        return undef if get_separator(path => $table, lucky => 1) ne ',';
    }
    return $dirpath;
}

sub _candidate_dirs {
    require File::HomeDir;
    return [
        File::Spec->catdir($INC[0], 'Lingua', 'Norms', 'USF', 'db'),
        File::Spec->catdir($Config{'installsitelib'}, 'Lingua', 'Norms', 'USF', 'db'), # within local Perl architecture
        File::Spec->catdir(File::HomeDir->my_documents(), 'Lingua', 'Norms', 'USF', 'db'),
    ];
}
 
1;
__END__
