#!/usr/bin/perl -w
#
#   Usage: ./mrsclient -h
#
#   martin.senger@gmail.com
#   February 2010
#-----------------------------------------------------------------------------

use strict;
use warnings;
use FindBin;
use lib "$FindBin::Bin/../lib";
use lib "$FindBin::Bin/../lib/perl5";
use MRS::Client;

sub say { print @_, "\n"; }

sub get_usage {
    return <<"END_OF_USAGE";
Usage:
   mrsclient [options] [query,...]

where query are terms, possibly with boolean operators;
    terms without any boolean operators are combined with AND;
    if there are no query arguments, options -a, -o and -q are
    considered (to define what query to make)

where 'options' are:
   -e <endpoint>  URL of the MRS search service
   -S <name>      service name of the MRS search service
   -H <hostname>  endpoint host (with standard ports)
   -E             list all endpoints and service names
   -l             list of all available databanks IDs
   -L             list of all available databanks details
   -C             list entry counts of all databanks

   -d <databank>  info about a databank,
                  or a databank that will be searched
                     if -a, -o, -q or text given
   -i <entry id>  show the given entry
   -a <terms>     query terms (whitespace separated)
                  that will be combined with logical AND
   -o <terms>     query terms (whitespace separated)
                  that will be combined with logical OR
   -q <expr>      boolean expression that will be used
                  either on its own, or as a filter for
                  results obtained by -a or -o
   -f <format>    format of the output databank entries
                  (header, title, plain, html, fasta, sequence)
   -A <algorithm> a scoring algorithm computing result
                  document relevance (Vector, Dice, Jaccard)
   -m <maxsize>   show only 'maxsize' number od entries
                  (used for queries: option -a, -o, -q)
   -s <start>     start showing entries only from the given
                  position; first entry has number 1
                  (used for queries: option -a, -o, -q)
   -c             show only document count of given query
                  (if there was any query defined),
                  or, show document count of a given databank
                  (if there was a -d option specified)
   -p             show properties of just executed query
                  (if there was any query defined)
   -n             do NOT show individual entries found by a query

   If there was a query specified (option -a, -o, -q or one or
   more free arguments), and the -n option was not given,
   then all found entries will be printed (in the given format).

   -h             this help
   -v             show version

END_OF_USAGE
}

# be prepare for command-line options/arguments
my @all_args = @ARGV;
use Getopt::Std;

use vars qw/ $opt_h $opt_v /;                       # general
use vars qw/ $opt_e $opt_S $opt_E $opt_H $opt_X/;   # endpoints
use vars qw/ $opt_a $opt_o $opt_q $opt_f $opt_A /;  # query
use vars qw/ $opt_p $opt_m $opt_n $opt_s /;
use vars qw/ $opt_d $opt_l $opt_L $opt_C $opt_c /;  # databanks
use vars qw/ $opt_i $opt_x/;
my $switches = 'aAdefHimoqsSxX';   # switches taking an argument
getopt ($switches);

# help wanted?
if ($opt_h or @all_args == 0) {
    print get_usage;
    exit 0;
}

# print version and exit
if ($opt_v) {
    print "$0 using MRS::Client version $MRS::Client::VERSION\n";
    exit 0;
}

# databanks may use UTF8 (e.g. OMIM definitely does)
binmode STDOUT, ":utf8";

# create the main worker
my @args = ();
push (@args, search_url => $opt_e) if defined $opt_e;
push (@args, admin_url => $opt_X) if defined $opt_X;
push (@args, host => $opt_H) if defined $opt_H;
push (@args, search_service => $opt_S) if defined $opt_S;
our $client = MRS::Client->new (@args);

# print environment (where to find server, etc.)
if (defined $opt_E) {
    say 'Search URL:           ' . $client->search_url      if $client->search_url;
    say 'Search service name:  ' . $client->search_service  if $client->search_service;
    say 'Search WSDL:          ' . $client->search_wsdl     if $client->search_wsdl;
    say '';
    say 'Blast URL:            ' . $client->blast_url       if $client->blast_url;;
    say 'Blast service name:   ' . $client->blast_service   if $client->blast_service;
    say 'Blast WSDL:           ' . $client->blast_wsdl      if $client->blast_wsdl;
    say '';
    say 'Clustal URL:          ' . $client->clustal_url     if $client->clustal_url;;
    say 'Clustal service name: ' . $client->clustal_service if $client->clustal_service;
    say 'Clustal WSDL:         ' . $client->clustal_wsdl    if $client->clustal_wsdl;
    say '';
    say 'Admin URL:            ' . $client->admin_url       if $client->admin_url;;
    say 'Admin service name:   ' . $client->admin_service   if $client->admin_service;
    say 'Admin WSDL:           ' . $client->admin_wsdl      if $client->admin_wsdl;
}

# list of databanks IDs
if (defined $opt_l) {
    say join ("\n", map { $_->id } $client->db);
}

# list of databanks details (info)
if (defined $opt_L) {
    say join ("\n-------------------------------\n", $client->db);
}

# entry counts of all databanks
if (defined $opt_C) {
    say join ("\n", map { sprintf ("%-15s %9d  %s", $_->id, $_->count, $_->version); } $client->db);
}

# info about a single database
if ( $opt_d and @ARGV == 0 and not ($opt_a or $opt_o or $opt_q or $opt_i) ) {
    if ($opt_c) {
	say $client->db ($opt_d)->count;
    } else {
	say $client->db ($opt_d);
    }
}

# query
my @and = ();
my @or = ();
if (@ARGV > 0) {
    # use remaining command-line arguments
    foreach my $text (@ARGV) {
	if (MRS::Operator->contains ($text)) {
	    $opt_q = $text;
	} else {
	    push (@and, $text);
	}
    }
} else {
    # use values from options -a, -o, and -q
    @and = split (/\s+/, $opt_a) if $opt_a;
    @or = split (/\s+/, $opt_o) if $opt_o;
}
my @find_args = ();
push (@find_args, 'and' => \@and) if @and > 0;
push (@find_args, 'or' => \@or) if @or > 0;
push (@find_args, query => $opt_q) if $opt_q;
push (@find_args, format => $opt_f) if $opt_f;
push (@find_args, algorithm => $opt_A) if $opt_A;
push (@find_args, max_entries => $opt_m) if $opt_m;
push (@find_args, start => $opt_s) if $opt_s;
if (@find_args > 0 and not $opt_i) {
    # make query
    my $find;
    if ($opt_d) {
	$find = $client->db ($opt_d)->find (@find_args);
    } else {
	$find = $client->find (@find_args);
    }
    # show results
    unless ($opt_n) {
	while (my $record = $find->next) {
	    say $record;
	}
    }
    if ($opt_c) {
	if ($opt_d) {
	    say $find->count;
	} else {
	    my %counts = $find->db_counts;
	    foreach my $db (sort keys %counts) {
		printf ("%-15s %9d\n", $db, $counts{$db});
	    }
	}
    }
    if ($opt_p) {
     	say "Query properties:\n" . $find;
    }
}

# get entry
if ($opt_i and $opt_d) {
    say $client->db ($opt_d)->entry ($opt_i, $opt_f);
}

#
# Get parser script for a given databank.
#
# Not supported generally. It is disabled at the default server.
# Additional options are:
#  -X <admin-endpoint>
#  -x <databank>  show parser script for the given databank
#
if ($opt_x) {
    print $client->parser ($client->db ($opt_x)->parser);
}


__END__
TBD:
* admin calls

