#!perl
# PODNAME: es-storage-data.pl
# ABSTRACT: Index pattern-aware elasticsearch storage statistics
use strict;
use warnings;
use feature qw(state);

use App::ElasticSearch::Utilities qw(es_request es_pattern es_nodes es_indices es_index_strip_date);
use CLI::Helpers qw(:output);
use Getopt::Long::Descriptive;
use Pod::Usage;

#------------------------------------------------------------------------#
# Argument Collection
my ($opt,$usage) = describe_options('%c %o',
    ['all',     "Scan all indices instead of processing the --base/--days parameters"],
    ['sort:s',  "sort by name or size, default: name",
            { default => 'name', callbacks => { 'must be name or size' => sub { $_[0] =~ /^name|size$/ } } }
    ],
    ['view:s',  "Show by index, base or node, default: node",
          { default => 'node', callbacks => { 'must be base, index, or node' => sub { $_[0] =~ /^base|index|node$/ } } }
    ],
    ['asc',     "Sort ascending  (default by name)"],
    ['desc',    "Sort descending (default by size)"],
    ['limit:i', "Limit to showing only this many, ie top N", { default => 0 }],
    ['raw',     "Display numeric data without rollups"],
    [],
    ['help|h',  "Display this help", { shortcircuit => 1 }],
    ['manual|m',"Display the full manual", { shortcircuit => 1 }],
);

#------------------------------------------------------------------------#
# Documentations!
if( $opt->help ) {
    print $usage->text;
    exit;
}
pod2usage(-exitstatus => 0, -verbose => 2) if $opt->manual;

# Get the pattern we're using
my $PATTERN = es_pattern();

# Indices and Nodes
my @INDICES = es_indices($opt->all ? ( _all => 1 ) : ());
my %NODES   = es_nodes();

# Loop through the indices and take appropriate actions;
my %indices = ();
my %nodes = ();
my %bases = ();
my %overview = (
    shards  => 0,
    indices => 0,
    docs    => 0,
    size    => 0,
);
foreach my $index (@INDICES) {
    verbose({color=>'green'}, "$index - Gathering statistics");

    $overview{indices}++;

    my $result = es_request('_status', { index => $index });
    if( !defined $result ) {
        output({color=>'magenta',indent=>1}, "+ Unable to fetch index status!");
        next;
    }
    verbose({indent=>1}, "+ Succesful");
    my $status = $result->{indices}{$index}{primaries};
    debug("index_status( $index ");
    debug_var($status);

    # Grab Index Data
    $indices{$index} = {
        size        => $status->{store}{size_in_bytes},
        docs        => $status->{docs}{count},
    };

    # Update the Overview
    $overview{size} += $status->{store}{size_in_bytes};
    $overview{docs} += $status->{docs}{count};

    my $base = es_index_strip_date($index);
    $bases{$base} ||=  { size => 0, docs => 0 };
    $bases{$base}->{size} += $status->{store}{size_in_bytes};
    $bases{$base}->{docs} += $status->{docs}{count};

    my $shards = es_request("_cat/shards/$index",
        { uri_param => { qw(bytes b format json) }}
    );

    my %shards = ();
    foreach my $s (@{ $shards }) {
        my ($node) = ($s->{node} =~ /^(\S+)/);
        $shards{$s->{shard}} ||= {};
        $nodes{$node}   ||= {};

        $overview{shards}++;

        if( exists $shards{$s->{shard}}->{$node} ) {
            $shards{$s->{shard}}->{$node}{size} += $s->{store};
            $shards{$s->{shard}}->{$node}{docs} += $s->{docs};
        }
        else {
            $shards{$s->{shard}}->{$node} = {
                size => $s->{store},
                docs => $s->{docs},
            };
        }
        no warnings;
        $nodes{$node}->{$_} ||= 0 for qw(size shards docs);
        $nodes{$node}->{size} += $s->{store};
        $nodes{$node}->{shards}++;
        $nodes{$node}->{docs} += $s->{docs};
    }
    $indices{$index}->{shards} = \%shards;
}

output({color=>'white'}, sprintf "Storage data for %s from indices matching '%s'", $opt->view, $PATTERN->{string});
if( $opt->view eq 'index' ) {
    my $displayed = 0;
    foreach my $index (sort indices_by keys %indices) {
        output({color=>"magenta",indent=>1}, $index);
        output({color=>"cyan",kv=>1,indent=>2}, 'size', pretty_size( $indices{$index}->{size}));
        output({color=>"cyan",kv=>1,indent=>2}, 'docs', $indices{$index}->{docs});
        $displayed++;
        last if $opt->limit > 0 && $displayed >= $opt->limit;
    }
}
if( $opt->view eq 'base' ) {
    my $displayed = 0;
    %indices = %bases;
    foreach my $index (sort indices_by keys %bases) {
        output({color=>"magenta",indent=>1}, $index);
        output({color=>"cyan",kv=>1,indent=>2}, 'size', pretty_size( $bases{$index}->{size}));
        output({color=>"cyan",kv=>1,indent=>2}, 'docs', $bases{$index}->{docs});
        $displayed++;
        last if $opt->limit > 0 && $displayed >= $opt->limit;
    }
}
elsif( $opt->view eq 'node' ) {
    my $displayed = 0;
    foreach my $node (sort nodes_by keys %nodes) {
        output({color=>"magenta",indent=>1}, $node);
        output({color=>"cyan",kv=>1,indent=>2}, 'size',   pretty_size( $nodes{$node}->{size}));
        output({color=>"cyan",kv=>1,indent=>2}, 'shards', $nodes{$node}->{shards});
        output({color=>"cyan",kv=>1,indent=>2}, 'docs',   $nodes{$node}->{docs});
        $displayed++;
        last if $opt->limit > 0 && $displayed >= $opt->limit;
    }
}
output({color=>'white',clear=>1},"Total for scanned data");
    output({color=>"cyan",kv=>1,indent=>1}, 'size',   pretty_size( $overview{size}));
    output({color=>"cyan",kv=>1,indent=>1}, 'shards', $overview{shards});
    output({color=>"cyan",kv=>1,indent=>1}, 'docs',   $overview{docs});


exit (0);

sub pretty_size {
    my ($size)=@_;
    state $warned = 0;

    my $value = $size;
    if( !$opt->raw ) {
        my @indicators = qw(kb mb gb tb);
        my $indicator = '';

        while( $size > 1024 && @indicators ) {
            $indicator = shift @indicators;
            $size /= 1024;
        }
        $value = sprintf('%0.2f %s', $size, $indicator);
    }

    return $value;
}

sub indices_by {
    if( $opt->sort eq 'size' ) {
        return $opt->asc ?
            $indices{$a}->{size} <=> $indices{$b}->{size} :
            $indices{$b}->{size} <=> $indices{$a}->{size} ;
    }
    return $opt->desc ? $b cmp $a : $a cmp $b;
}

sub nodes_by {
    if( $opt->sort eq 'size' ) {
        return $opt->asc ?
            $nodes{$a}->{size} <=> $nodes{$b}->{size} :
            $nodes{$b}->{size} <=> $nodes{$a}->{size} ;
    }
    return $opt->desc ? $b cmp $a : $a cmp $b;
}

__END__

=pod

=encoding UTF-8

=head1 NAME

es-storage-data.pl - Index pattern-aware elasticsearch storage statistics

=head1 VERSION

version 5.6

=head1 SYNOPSIS

es-storage-data.pl --local --pattern logstash-* shards

Options:

    --help              print help
    --manual            print full manual
    --view              Show by node or index, default node
    --format            Output format for numeric data, pretty(default) or raw
    --sort              Sort by, name(default) or size
    --limit             Show only the top N, default no limit
    --asc               Sort ascending
    --desc              Sort descending (default)

From App::ElasticSearch::Utilities:

    --local         Use localhost as the elasticsearch host
    --host          ElasticSearch host to connect to
    --port          HTTP port for your cluster
    --proto         Defaults to 'http', can also be 'https'
    --http-username HTTP Basic Auth username
    --http-password HTTP Basic Auth password (if not specified, and --http-user is, you will be prompted)
    --password-exec Script to run to get the users password
    --noop          Any operations other than GET are disabled, can be negated with --no-noop
    --timeout       Timeout to ElasticSearch, default 30
    --keep-proxy    Do not remove any proxy settings from %ENV
    --index         Index to run commands against
    --base          For daily indexes, reference only those starting with "logstash"
                     (same as --pattern logstash-* or logstash-DATE)
    --datesep       Date separator, default '.' also (--date-separator)
    --pattern       Use a pattern to operate on the indexes
    --days          If using a pattern or base, how many days back to go, default: all

See also the "CONNECTION ARGUMENTS" and "INDEX SELECTION ARGUMENTS" sections from App::ElasticSearch::Utilities.

From CLI::Helpers:

    --data-file         Path to a file to write lines tagged with 'data => 1'
    --color             Boolean, enable/disable color, default use git settings
    --verbose           Incremental, increase verbosity (Alias is -v)
    --debug             Show developer output
    --debug-class       Show debug messages originating from a specific package, default: main
    --quiet             Show no output (for cron)
    --syslog            Generate messages to syslog as well
    --syslog-facility   Default "local0"
    --syslog-tag        The program name, default is the script name
    --syslog-debug      Enable debug messages to syslog if in use, default false

=head1 DESCRIPTION

This script allows you view the storage statistics of the ElasticSearch cluster.

Usage:

    # Show usage data for nodes with logstash indices
    $ es-storage-data.pl --local --pattern logstash-*

    # Show the top 10 largest indices
    $ es-storage-data.pl --local --view index --limit 10 --sort size

    # Show the "newest" logstash index
    $ es-storage-data.pl --local --view index --limit 1

=head1 OPTIONS

=over 8

=item B<help>

Print this message and exit

=item B<manual>

Print this message and exit

=item B<view>

Default view is by node, but can also be index to see statistics by index

=item B<sort>

How to sort the data, by it's name (the default) or size

=item B<limit>

Show only the first N items, or everything is N == 0

=item B<asc>

Sort ascending, the default for name

=item B<desc>

Sort descending, the default for size

=back

=head1 AUTHOR

Brad Lhotsky <brad@divisionbyzero.net>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2012 by Brad Lhotsky.

This is free software, licensed under:

  The (three-clause) BSD License

=cut
