#! /usr/bin/perl
# PODNAME: dpath
# ABSTRACT: cmdline tool around Data::DPath

use 5.008;
use strict;
use warnings;

use App::Rad;
use Data::DPath 'dpath';
use Scalar::Util 'reftype';

######################################################################
#
# App::Rad interface
#
######################################################################

BEGIN {
        my $default_cmd = "search";
        unshift @ARGV, $default_cmd unless $ARGV[0] && $ARGV[0] =~ /^(search|help)$/;
}

App::Rad->run();

sub setup
{
        my $c = shift;
        $c->unregister_command("help");
        $c->register_commands("help", "search");
}

sub help
{
        my ($c) = @_;

        return "dpath [-ios] [--fb] [ --fi] <DPath>

        -o
        --outtype        - output format
                           [yaml(default), json, dumper, xml]
        -i
        --intype         - input format
                           [yaml(default), yaml10, json, dumper, xml, tap, ini]
        -s
        --separator      - sub entry separator for output format 'flat'
                           (default=;)
        --fb             - on output format 'flat' use [brackets] around
                           outer arrays
        --fi             - on output format 'flat' prefix outer array lines
                           with index

        See 'perldoc Data::DPath' for how to specify a DPath.
";
}

sub search :Help(search incoming data by DPath (default commmand))
{
        my ($c) = @_;

        _getopt($c);

        my $path    = $c->argv->[0];
        my $file    = $c->argv->[1] || '-';
        my $data    = _read_in( $c, $file );

        my $out;
        foreach my $datum (ref $data eq ref [] ? @$data : $data) {
            my $result = _match($c, $datum, $path);
            $out .= _write_out($c, $result);
        }
        return $out;
}

sub default { search(@_) }

######################################################################
#
# Implementation
#
######################################################################

sub _read_in
{
        my ($c, $file) = @_;

        my $opt = $c->options;

        my $intype  = $opt->{intype}  || 'yaml';
        my $data;
        my $filecontent;
        {
                local $/;
                if ($file eq '-') {
                        $filecontent = <STDIN>;
                }
                else
                {
                        open (my $FH, "<", $file) or die "dpath: cannot open input file $file.\n";
                        $filecontent = <$FH>;
                        close $FH;
                }
        }

        if (not defined $filecontent or $filecontent !~ /[^\s\t\r\n]/ms) {
                die "dpath: no meaningful input to read.\n";
        }

        if ($intype eq "yaml") {
                require YAML::Any;
                @YAML::Any::_TEST_ORDER=(qw(YAML::XS YAML::Old YAML YAML::Tiny)); # no YAML::Syck
                $data = [YAML::Any::Load($filecontent)];
        }
        elsif ($intype eq "yaml10") {
                require YAML::Syck;
                $data = [YAML::Syck::Load($filecontent)];
        }
        elsif ($intype eq "json") {
                require JSON;
                $data = JSON::decode_json($filecontent);
        }
        elsif ($intype eq "xml")
        {
                require XML::Simple;
                my $xs = new XML::Simple;
                $data  = $xs->XMLin($filecontent, KeepRoot => 1);
        }
        elsif ($intype eq "ini") {
                require Config::INI::Serializer;
                my $ini = Config::INI::Serializer->new;
                $data = $ini->deserialize($filecontent);
        }
        elsif ($intype eq "cfggeneral") {
                require Config::General;
                my %data = Config::General->new(-String => $filecontent,
                                                -InterPolateVars => 1,
                                               )->getall;
                $data = \%data;
        }
        elsif ($intype eq "dumper") {
                eval '$data = my '.$filecontent;
        }
        elsif ($intype eq "tap") {
                require TAP::DOM;
                require TAP::Parser;
                $data = new TAP::DOM( tap => $filecontent, $TAP::Parser::VERSION > 3.22 ? (version => 13) : () );
        }
        else
        {
                die "dpath: unrecognized input format: $intype.\n";
        }
        return $data;
}

sub _match
{
        my ($c, $data, $path) = @_;

        if (not $data) {
                die "dpath: no input data to match.\n";
        }

        my @resultlist = dpath($path)->match($data);
        return \@resultlist;
}

sub _format_flat_inner_scalar
{
    my ($c, $result) = @_;

    no warnings 'uninitialized';

    return "$result";
}

sub _format_flat_inner_array
{
        my ($c, $result) = @_;

        no warnings 'uninitialized';

        my $opt = $c->options;

        return
         join($opt->{separator},
              map {
                   # only SCALARS allowed (where reftype returns undef)
                   die "dpath: unsupported innermost nesting (".reftype($_).") for 'flat' output.\n" if defined reftype($_);
                   "".$_
                  } @$result);
}

sub _format_flat_inner_hash
{
        my ($c, $result) = @_;

        no warnings 'uninitialized';

        my $opt = $c->options;

        return
         join($opt->{separator},
              map { my $v = $result->{$_};
                    # only SCALARS allowed (where reftype returns undef)
                    die "dpath: unsupported innermost nesting (".reftype($v).") for 'flat' output.\n" if defined reftype($v);
                    "$_=".$v
                  } keys %$result);
}

sub _format_flat_outer
{
        my ($c, $result) = @_;

        no warnings 'uninitialized';

        my $opt = $c->options;

        my $output = "";
        die "dpath: can not flatten data structure (undef) - try other output format.\n" unless defined $result;

        my $A = ""; my $B = ""; if ($opt->{fb}) { $A = "["; $B = "]" }
        my $fi = $opt->{fi};

        if (!defined reftype $result) { # SCALAR
                $output .= $result."\n"; # stringify
        }
        elsif (reftype $result eq 'ARRAY') {
                for (my $i=0; $i<@$result; $i++) {
                        my $entry  = $result->[$i];
                        my $prefix = $fi ? "$i:" : "";
                        if (!defined reftype $entry) { # SCALAR
                                $output .= $prefix.$A._format_flat_inner_scalar($c, $entry)."$B\n";
                        }
                        elsif (reftype $entry eq 'ARRAY') {
                                $output .= $prefix.$A._format_flat_inner_array($c, $entry)."$B\n";
                        }
                        elsif (reftype $entry eq 'HASH') {
                                $output .= $prefix.$A._format_flat_inner_hash($c, $entry)."$B\n";
                        }
                        else {
                                die "dpath: can not flatten data structure (".reftype($entry).").\n";
                        }
                }
        }
        elsif (reftype $result eq 'HASH') {
                my @keys = keys %$result;
                foreach my $key (@keys) {
                        my $entry = $result->{$key};
                        if (!defined reftype $entry) { # SCALAR
                                $output .= "$key:"._format_flat_inner_scalar($c, $entry)."\n";
                        }
                        elsif (reftype $entry eq 'ARRAY') {
                                $output .= "$key:"._format_flat_inner_array($c, $entry)."\n";
                        }
                        elsif (reftype $entry eq 'HASH') {
                                $output .= "$key:"._format_flat_inner_hash($c, $entry)."\n";
                        }
                        else {
                                die "dpath: can not flatten data structure (".reftype($entry).").\n";
                        }
                }
        }
        else {
                die "dpath: can not flatten data structure (".reftype($result).") - try other output format.\n";
        }

        return $output;
}

sub _format_flat
{
        my ($c, $resultlist) = @_;

        my $opt = $c->options;

        my $output = "";
        $opt->{separator} = ";" unless defined $opt->{separator};
        $output .= _format_flat_outer($c, $_) foreach @$resultlist;
        return $output;
}

sub _write_out
{
        my ($c, $resultlist) = @_;

        my $opt = $c->options;

        my $output = "";
        my $outtype = $opt->{outtype} || 'yaml';
        if ($outtype eq "yaml")
        {
                require YAML::Any;
                @YAML::Any::_TEST_ORDER=(qw(YAML::XS YAML::Old YAML YAML::Tiny)); # no YAML::Syck
                $output .= YAML::Any::Dump($resultlist);
        }
        elsif ($outtype eq "yaml10")
        {
                require YAML::Syck;
                $output .= YAML::Syck::Dump($resultlist);
        }
        elsif ($outtype eq "json")
        {
                eval "use JSON -convert_blessed_universally";
                my $json = JSON->new->allow_nonref->pretty->allow_blessed->convert_blessed;
                $output .= $json->encode($resultlist);
        }
        elsif ($outtype eq "ini") {
                require Config::INI::Serializer;
                my $ini = Config::INI::Serializer->new;
                $output .= $ini->serialize($resultlist);
        }
        elsif ($outtype eq "dumper")
        {
                require Data::Dumper;
                $output .= Data::Dumper::Dumper($resultlist);
        }
        elsif ($outtype eq "xml")
        {
                require XML::Simple;
                my $xs = new XML::Simple;
                $output .= $xs->XMLout($resultlist, AttrIndent => 1, KeepRoot => 1);
        }
        elsif ($outtype eq "flat") {
                $output .= _format_flat( $c, $resultlist );
        }
        else
        {
                die "dpath: unrecognized output format: $outtype.";
        }
        return $output;
}

sub _getopt
{
        my ($c) = @_;

        $c->getopt( "faces|f=i",
                    "times|t=i",
                    "intype|i=s",
                    "outtype|o=s",
                    "separator|s=s",
                    "fb",
                    "fi",
                  )
         or help() and return undef;
        if (not $c->argv->[0]) {
                die "dpath: please specify a dpath.\n";
        }
}

__END__

=pod

=encoding UTF-8

=head1 NAME

dpath - cmdline tool around Data::DPath

=head1 SYNOPSIS

Query some input data with a DPath to stdout.

Default data format (in and out) is YAML, other formats can be
specified.

  $ dpath '//some/dpath' data.yaml

Use it as filter:

  $ dpath '//some/dpath' < data.yaml > result.yaml
  $ cat data.yaml | dpath '//some/dpath' > result.yaml
  $ cat data.yaml | dpath '//path1' | dpath '//path2' | dpath '//path3'

Specify that output is YAML(default), JSON or Data::Dumper:

  $ dpath -o yaml   '//some/dpath' data.yaml
  $ dpath -o json   '//some/dpath' data.yaml
  $ dpath -o dumper '//some/dpath' data.yaml

Input is JSON:

  $ dpath -i json '//some/dpath' data.json

Input is INI:

  $ dpath -i ini '//some/dpath' data.ini

Input is TAP:

  $ dpath -i tap '//some/dpath' data.tap
  $ perl t/some_test.t | dpath -i tap '//tests_planned'

Input is JSON, Output is Data::Dumper:

  $ dpath -i json -o dumper '//some/dpath' data.json

=head2 Input formats

The following B<input formats> are allowed, with their according
modules used to convert the input into a data structure:

 yaml   - YAML::Any (default; not using YAML::Syck)
 yaml10 - YAML::Syck
 json   - JSON
 xml    - XML::Simple
 ini    - Config::INI::Serializer
 dumper - Data::Dumper (including the leading $VAR1 variable assignment)
 tap    - TAP::DOM

=head2 Output formats

The following B<output formats> are allowed:

 yaml   - YAML::Any (default; not using YAML::Syck)
 yaml10 - YAML::Syck
 json   - JSON
 xml    - XML::Simple
 ini    - Config::INI::Serializer
 dumper - Data::Dumper (including the leading $VAR1 variable assignment)
 flat   - pragmatic flat output for typical unixish cmdline usage

=head2 The 'flat' output format

The C<flat> output format is meant to support typical unixish command
line uses. It is not a strong serialization format but works well for
simple values nested max 2 levels.

Output looks like this:

=head3 Plain values

 Affe
 Tiger
 Birne

=head3 Outer hashes

One outer key per line, key at the beginning of line with a colon
(C<:>), inner values separated by semicolon C<;>:

=head4 inner scalars:

 coolness:big
 size:average
 Eric:The flat one from the 90s

=head4 inner hashes:

Tuples of C<key=value> separated by semicolon C<;>:

 Affe:coolness=big;size=average
 Zomtec:coolness=bit anachronistic;size=average

=head4 inner arrays:

Values separated by semicolon C<;>:

 Birne:bissel;hinterher;manchmal

=head3 Outer arrays

One entry per line, entries separated by semicolon C<;>:

=head4 inner scalars:

 single report string
 foo
 bar
 baz

=head4 inner hashes:

Tuples of C<key=value> separated by semicolon C<;>:

 Affe=amazing moves in the jungle;Zomtec=slow talking speed;Birne=unexpected in many respects

=head4 inner arrays:

Entries separated by semicolon C<;>:

 line A-1;line A-2;line A-3;line A-4;line A-5
 line B-1;line B-2;line B-3;line B-4
 line C-1;line C-2;line C-3

=head3 Additional markup for arrays:

 --fb            ... use [brackets] around outer arrays
 --fi            ... prefix outer array lines with index
 --separator=;   ... use given separator between array entries (defaults to ";")

Such additional markup lets outer arrays look like this:

 0:[line A-1;line A-2;line A-3;line A-4;line A-5]
 1:[line B-1;line B-2;line B-3;line B-4]
 2:[line C-1;line C-2;line C-3]
 3:[Affe=amazing moves in the jungle;Zomtec=slow talking speed;Birne=unexpected in many respects]
 4:[single report string]

=head1 SEE ALSO

For more information about the DPath syntax, see

 perldoc Data::DPath

=head1 AUTHOR

Steffen Schwigon <ss5@renormalist.net>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Steffen Schwigon.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
