package Search::OpenSearch::Engine::Lucy;
use strict;
use warnings;
use Carp;
use base qw( Search::OpenSearch::Engine );
use SWISH::Prog::Lucy::Indexer;
use SWISH::Prog::Lucy::Searcher;
use SWISH::Prog::Doc;
use Lucy::Object::BitVector;
use Lucy::Search::Collector::BitCollector;
use Data::Dump qw( dump );
use Scalar::Util qw( blessed );
use Module::Load;
use Path::Class::Dir;
use SWISH::3 qw(:constants);

our $VERSION = '0.12';

__PACKAGE__->mk_accessors(
    qw(
        aggregator_class
        auto_commit
        )
);

use Rose::Object::MakeMethods::Generic ( 'scalar --get_set_init' => 'indexer',
);

sub init {
    my $self = shift;
    $self->SUPER::init(@_);
    $self->{aggregator_class} ||= 'SWISH::Prog::Aggregator';
    $self->{array_field_values} = 1;
    $self->{auto_commit} = 1 unless defined $self->{auto_commit};
    load $self->{aggregator_class};
    return $self;
}

sub init_searcher {
    my $self     = shift;
    my $index    = $self->index or croak "index not defined";
    my $searcher = SWISH::Prog::Lucy::Searcher->new(
        invindex => $index,
        debug    => $self->debug,
        %{ $self->searcher_config },
    );
    if ( !$self->fields ) {
        $self->fields( $searcher->get_propnames );
    }
    return $searcher;
}

sub build_facets {
    my $self    = shift;
    my $query   = shift or croak "query required";
    my $results = shift or croak "results required";
    if ( $self->debug and $self->logger ) {
        $self->logger->log(
            "build_facets check for self->facets=" . $self->facets );
    }
    my $facetobj = $self->facets or return;

    my @facet_names = @{ $facetobj->names };
    my $sample_size = $facetobj->sample_size || 0;
    if ( $self->debug and $self->logger ) {
        $self->logger->log( "building facets for "
                . dump( \@facet_names )
                . " with sample_size=$sample_size" );
    }
    my $searcher      = $self->searcher;
    my $lucy_searcher = $searcher->{lucy};
    my $query_parser  = $searcher->{qp};
    my $bit_vec       = Lucy::Object::BitVector->new(
        capacity => $lucy_searcher->doc_max + 1 );
    my $collector
        = Lucy::Search::Collector::BitCollector->new( bit_vector => $bit_vec,
        );

    $lucy_searcher->collect(
        query     => $query_parser->parse("$query")->as_lucy_query(),
        collector => $collector
    );

    # find the facets
    my %facets;
    my $doc_id = 0;
    my $count  = 0;
    my $loops  = 0;
    while (1) {
        $loops++;
        $doc_id = $bit_vec->next_hit( $doc_id + 1 );
        last if $doc_id == -1;
        last if $sample_size and ++$count > $sample_size;
        my $doc = $lucy_searcher->fetch_doc($doc_id);
        for my $name (@facet_names) {

            # unique-ify
            my %val = map { $_ => $_ }
                split( m/\003/,
                ( defined $doc->{$name} ? $doc->{$name} : '' ) );
            for my $value ( keys %val ) {
                $facets{$name}->{$value}++;
            }
        }
    }

    if ( $self->debug and $self->logger ) {
        $self->logger->log(
            "got " . scalar( keys %facets ) . " facets in $loops loops" );
    }

    # turn the struct inside out a bit, esp for XML
    my %facet_struct;
    for my $f ( keys %facets ) {
        for my $n ( keys %{ $facets{$f} } ) {
            push @{ $facet_struct{$f} },
                { term => $n, count => $facets{$f}->{$n} };
        }
    }
    return \%facet_struct;
}

sub has_rest_api {1}

sub get_allowed_http_methods {
    my $self = shift;
    if ( $self->auto_commit ) {
        return qw( GET POST PUT DELETE );
    }
    return qw( GET POST PUT DELETE COMMIT ROLLBACK );
}

sub _massage_rest_req_into_doc {
    my ( $self, $req ) = @_;

    #dump $req;
    my $doc;

    if ( !blessed($req) ) {
        $doc = SWISH::Prog::Doc->new(
            version => 3,
            %$req
        );
    }
    else {

        #dump $req->headers;

        # $req should act like a HTTP::Request object.
        my %args = (
            version => 3,
            url     => $req->uri->path,        # TODO test
            content => $req->content,
            size    => $req->content_length,
            type    => $req->content_type,

            # type
            # action
            # parser
            # modtime
        );

        #dump \%args;

        $doc = SWISH::Prog::Doc->new(%args);

    }

    # use set_parser_from_type so that SWISH::3 does the Right Thing
    # instead of looking at the original mime-type.
    my $aggregator
        = $self->aggregator_class->new( set_parser_from_type => 1 );
    $aggregator->swish_filter($doc);

    return $doc;
}

sub init_indexer {
    my $self = shift;

    # unlike a Searcher, which has an array of invindex objects,
    # the Indexer wants only one. We take the first by default,
    # but a subclass could do more subtle logic here.

    my $indexer = SWISH::Prog::Lucy::Indexer->new(
        invindex => $self->index->[0],
        debug    => $self->debug,
        %{ $self->indexer_config },
    );
    return $indexer;
}

# PUT only if it does not yet exist
sub PUT {
    my $self = shift;
    my $req  = shift or croak "request required";
    my $doc  = $self->_massage_rest_req_into_doc($req);
    my $uri  = $doc->url;

    # edge case: index might not yet exist.
    my $exists;
    my $index = $self->index or croak "index not defined";
    if (   -d $index->[0]
        && -s Path::Class::Dir->new( $index->[0] )
        ->file( SWISH_HEADER_FILE() ) )
    {
        $exists = $self->GET($uri);
        if ( $exists->{code} == 200 ) {
            return { code => 409, msg => "Document $uri already exists" };
        }
    }

    my $indexer
        = $self->auto_commit
        ? $self->init_indexer()
        : $self->indexer();
    $indexer->process($doc);

    if ( !$self->auto_commit ) {
        my $total = 1;
        return { code => 202, total => 1, };
    }

    my $total = $indexer->finish();
    $exists = $self->GET( $doc->url );
    if ( $exists->{code} != 200 ) {
        return { code => 500, msg => 'Failed to PUT doc' };
    }
    return { code => 201, total => $total, doc => $exists->{doc} };
}

# POST allows new and updates
sub POST {
    my $self = shift;
    my $req  = shift or croak "request required";
    my $doc  = $self->_massage_rest_req_into_doc($req);
    my $uri  = $doc->url;
    my $indexer
        = $self->auto_commit
        ? $self->init_indexer()
        : $self->indexer();
    $indexer->process($doc);

    if ( !$self->auto_commit ) {
        my $total = 1;
        return { code => 202, total => 1, };
    }

    my $total  = $indexer->finish();
    my $exists = $self->GET( $doc->url );

    if ( $exists->{code} != 200 ) {
        return { code => 500, msg => 'Failed to POST doc' };
    }
    return { code => 200, total => $total, doc => $exists->{doc} };
}

sub COMMIT {
    my $self = shift;
    if ( $self->auto_commit ) {
        return { code => 400 };
    }
    my $indexer = $self->indexer();
    if ( my $total = $indexer->count() ) {
        $indexer->finish();

        # MUST invalidate current indexer
        $self->indexer(undef);

        return { code => 200, total => $total };
    }
    else {
        return { code => 204 };
    }
}

sub ROLLBACK {
    my $self = shift;
    if ( !$self->auto_commit ) {
        my $reverted = $self->indexer->count;
        $self->indexer->abort();
        $self->indexer(undef);
        return { code => 200, total => $reverted };
    }
    else {
        return { code => 400 };
    }
}

sub DELETE {
    my $self     = shift;
    my $uri      = shift or croak "uri required";
    my $existing = $self->GET($uri);
    if ( $existing->{code} != 200 ) {
        return {
            code => 404,
            msg  => "$uri cannot be deleted because it does not exist"
        };
    }
    my $indexer
        = $self->auto_commit
        ? $self->init_indexer()
        : $self->indexer;
    $indexer->get_lucy->delete_by_term(
        field => 'swishdocpath',
        term  => $uri,
    );

    if ( !$self->auto_commit ) {
        return { code => 202 };
    }

    $indexer->finish();
    return { code => 200, };
}

sub _get_swishdocpath_analyzer {
    my $self = shift;
    return $self->{_uri_analyzer} if exists $self->{_uri_analyzer};
    my $qp    = $self->searcher->{qp};         # TODO expose this as accessor?
    my $field = $qp->get_field('swishdocpath');
    if ( !$field ) {

        # field is not defined as a MetaName, just a PropertyName,
        # so we do not analyze it
        $self->{_uri_analyzer} = 0;    # exists but false
        return 0;
    }
    $self->{_uri_analyzer} = $field->analyzer;
    return $self->{_uri_analyzer};
}

sub _analyze_uri_string {
    my ( $self, $uri ) = @_;
    my $analyzer = $self->_get_swishdocpath_analyzer();

    #warn "uri=$uri";

    if ( !$analyzer ) {
        return $uri;
    }
    else {
        return grep { defined and length } @{ $analyzer->split($uri) };
    }
}

sub GET {
    my $self = shift;
    my $uri = shift or croak "uri required";

    # use internal Lucy searcher directly to avoid needing MetaName defined
    my $q = Lucy::Search::PhraseQuery->new(
        field => 'swishdocpath',
        terms => [ $self->_analyze_uri_string($uri) ]
    );

    #warn "q=" . $q->to_string();

    my $lucy_searcher = $self->searcher->get_lucy();
    my $hits = $lucy_searcher->hits( query => $q );

    #warn "$q total=" . $hits->total_hits();
    my $hitdoc = $hits->next;

    if ( !$hitdoc ) {
        return { code => 404, };
    }

    #dump $hitdoc;

    # get all fields
    my %doc;
    my $fields = $self->fields;
    for my $field (@$fields) {
        my $str = $hitdoc->{$field};
        $doc{$field} = [ split( m/\003/, defined $str ? $str : "" ) ];
    }
    $doc{title}   = $hitdoc->{swishtitle};
    $doc{summary} = $hitdoc->{swishdescription};

    my $ret = {
        code => 200,
        doc  => \%doc,
    };

    #dump $ret;

    return $ret;
}

1;

__END__

=head1 NAME

Search::OpenSearch::Engine::Lucy - Lucy server with OpenSearch results

=head1 SYNOPSIS

 use Search::OpenSearch::Engine::Lucy;
 my $engine = Search::OpenSearch::Engine::Lucy->new(
    index       => [qw( path/to/index1 path/to/index2 )],
    facets      => {
        names       => [qw( color size flavor )],
        sample_size => 10_000,
    },
    fields      => [qw( color size flavor )],   # result attributes in response
    indexer_config  => {
        somekey => somevalue,
    },
    searcher_config => {
        anotherkey => anothervalue,
    },
    aggregator_class => 'MyAggregator', # defaults to SWISH::Prog::Aggregator
    cache           => CHI->new(
        driver           => 'File',
        dir_create_mode  => 0770,
        file_create_mode => 0660,
        root_dir         => "/tmp/opensearch_cache",
    ),
    cache_ttl       => 3600,
    do_not_hilite   => [qw( color )],
    snipper_config  => { as_sentences => 1 },        # see Search::Tools::Snipper
    hiliter_config  => { class => 'h', tag => 'b' }, # see Search::Tools::HiLiter
    parser_config   => {},                           # see Search::Query::Parser
    
 );
 my $response = $engine->search(
    q           => 'quick brown fox',   # query
    s           => 'rank desc',         # sort order
    o           => 0,                   # offset
    p           => 25,                  # page size
    h           => 1,                   # highlight query terms in results
    c           => 0,                   # count total only (same as f=0 r=0)
    L           => 'field|low|high',    # limit results to inclusive range
    f           => 1,                   # include facets
    r           => 1,                   # include results
    t           => 'XML',               # or JSON
    u           => 'http://yourdomain.foo/opensearch/',
    b           => 'AND',               # or OR
 );
 print $response;

=head1 METHODS

=head2 aggregator_class

Passed as param to new(). This class is used for filtering
incoming docs via the aggregator's swish_filter() method.

=head2 auto_commit( 0 | 1 )

Set this in new().

If true, a new indexer is spawned via init_indexer() for
each POST, PUT or DELETE.

If false, the same indexer is re-used in POST, PUT or DELETE
calls, until COMMIT or ROLLBACK is called.

Default is true (on).

=head2 init

Overrides base method to load the I<aggregator_class> and other
Engine-specific construction tasks.

=head2 init_searcher

Returns a SWISH::Prog::Lucy::Searcher object.

=head2 init_indexer

Returns a SWISH::Prog::Lucy::Indexer object (used by the REST API).

=head2 build_facets( I<query>, I<results> )

Returns hash ref of facets from I<results>. See Search::OpenSearch::Engine.

=head2 process_result( I<args> )

Overrides base method to preserve multi-value fields as arrays.

=head2 has_rest_api

Returns true.

=head2 get_allowed_http_methods

Returns array (not an array ref) of supported HTTP method names.
These correspond to the UPPERCASE method names below.

B<NOTE> that COMMIT and ROLLBACK are not official HTTP/1.1 method
names.

=head2 PUT( I<doc> )

=head2 POST( I<doc> )

=head2 DELETE( I<uri> )

=head2 GET( I<uri> )

=head2 COMMIT

If auto_commit is false, use this method to conclude a transaction.

=head2 ROLLBACK

If auto_commit is false, use this method to abort a transaction.

=head1 AUTHOR

Peter Karman, C<< <karman at cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-search-opensearch-engine-lucy at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Search-OpenSearch-Engine-Lucy>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Search::OpenSearch::Engine::Lucy


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Search-OpenSearch-Engine-Lucy>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Search-OpenSearch-Engine-Lucy>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/Search-OpenSearch-Engine-Lucy>

=item * Search CPAN

L<http://search.cpan.org/dist/Search-OpenSearch-Engine-Lucy/>

=back

=head1 COPYRIGHT & LICENSE

Copyright 2010 Peter Karman.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.


=cut
