##########################################################
## This script is part of the Devel::NYTProf distribution
##
## Copyright, contact and other information can be found
## at the bottom of this file, or by going to:
## http://search.cpan.org/~akaplan/Devel-NYTProf
##
###########################################################
package Devel::NYTProf::Reader;

use warnings;
use strict;
use Carp;
use vars qw/$VERSION/;

BEGIN {
	our $VERSION = '1.10'; # must match NYTProf.pm b/c dumb MakeMaker

	require XSLoader;
	XSLoader::load('Devel::NYTProf', $Devel::NYTProf::Reader::VERSION);

	# Provides Devel::NYTProf::Reader::process();
}

# These control the limits for what the script will consider ok to severe times
# specified in standard deviations from the mean time
use constant SEVERITY_SEVERE			=> 2.0; # above this deviation, a bottleneck
use constant SEVERITY_BAD					=> 1.0;
use constant SEVERITY_GOOD				=> 0.5; # within this deviation, okay

sub new {
	my $class = shift;
	my $self = {
			file => 'nytprof.out',
			output_dir => '.',
			suffix => '.csv',
			header => 
				"# Profile data generated by Devel::NYTProf::Reader\n"
				."# Version: v$VERSION\n"
				."# Author: Adam Kaplan. More information at "
				."http://search.cpan.org/~akaplan\n"
			  ."# Format: time,calls,time/call,code\n",
			datastart => '',
			line => [
				{},
				{ value => 'time',			end => ',', default => '0'},
				{ value => 'calls',			end => ',', default => '0'},
				{ value => 'time/call',	end => ',', default => '0'},
				{ value => 'source',		end => '',	default => '' },
				{ end => "\n" }
			],
			dataend => '',
			footer => '',
			taintmsg => "# WARNING!\n"
				."# The source file used in generating this report has been modified\n"
				."# since generating the profiler database.  It might be out of sync\n",
			# -- OTHER STUFF --
			replacements => [
				{pattern => '!~FILENAME~!', 
					replace => "\$FILE"},
				{pattern => '!~DEV_CALLS~!', 
					replace => "\$statistics{calls}->[0]"},
				{pattern => '!~DEV_TIME~!', 
					replace => "\$statistics{time}->[0]"},
				{pattern => '!~DEV_TIME/CALL~!', 
					replace => "\$statistics{'time/calls'}"},
				{pattern => '!~MEAN_CALLS~!', 
					replace => "\$statistics{calls}->[1]"},
				{pattern => '!~MEAN_TIME~!', 
					replace => "\$statistics{time}->[1]"},
				{pattern => '!~MEAN_TIME/CALLS~!', 
					replace => "\$statistics{'time/calls'}->[1]"},
				{pattern => '!~TOTAL_CALLS~!', 
					replace => "\$self->{filestats}->{\$filestr}->{'calls'}"},
				{pattern => '!~TOTAL_TIME~!', 
					replace => "\$self->{filestats}->{\$filestr}->{'time'}"},
			],
			callsfunc => undef,
			timefunc => undef,
			'time/callsfunc' => undef,
			numeric_precision => {
				time				=> 7,
				calls				=> 0,
				'time/call'	=> 7
			},
		};

	if (defined $_[0]) {
		$self->{file} = $_[0];
	}

	bless($self, $class);
	$self->{data} = process($self->{file});
	$self->{profile_db_time} = getDatabaseTime();
	return $self;
}

##
sub setParam {
	my ($self, $param, $value) = @_;

	if ($param eq 'linestart') {
		$self->{line}->[0] = $value;
	} elsif ($param eq 'column1') {
		$self->{line}->[1] = $value;
	} elsif ($param eq 'column2') {
		$self->{line}->[2] = $value;
	} elsif ($param eq 'column3') {
		$self->{line}->[3] = $value;
	} elsif ($param eq 'column4') {
		$self->{line}->[4] = $value;
	} elsif ($param eq 'lineend') {
		$self->{line}->[5] = $value;
	} elsif (!exists $self->{$param}) {
		confess "Attempt to set $param to $value failed: $param is not a valid "
						."parameter\n";
	} else {
		return $self->{$param} unless defined ($value);
		$self->{$param} = $value;
	}
	undef;
}

##
sub addRegexp {
	my ($self, $pattern, $replace) = @_;
	push (@{$self->{user_regexp}}, {pattern => $pattern, replace => $replace});
}

# calculate the standard deviation for this data set
# NOT USED.  This will work very well in code that is normalized (i.e. the same
# time is spent in every file) but is fairly useless in practice...
# use it if you want.
sub calculate_sd {
	my $stats = shift;
	$stats = [sort {$a <=> $b} @$stats];

	my $sum;
	map ($sum += $_, @$stats);
	my $mean = $sum / scalar @$stats;

	$sum = 0;
	map ($sum += ($_ - $mean) ** 2, @$stats);
	return [sqrt (abs ($sum / scalar @$stats)), $mean];
}

## Use average distance from the median value. Highly resistant to extremes
sub calculate_median_absolute_deviation {
	my $stats = shift;
	$stats = [sort {$a <=> $b} @$stats];
	my $median = $stats->[int (scalar(@$stats) / 2)];
	my $sum;
	map ($sum += abs($_ - $median), @$stats);
	return [$sum / scalar @$stats, $median];
}

##
sub _test_file {
	my $self = shift;
	my $file = shift;
	unless (-f $file) {
		carp "Unable to locate source file: $file\n";
		return 0; 
	}
	return 1 if (stat $file)[9] > $self->{profile_db_time};
	0;
}

##
sub _output_additional {
	my ($self, $fname, $content) = @_;
	open (OUT, '>', "$self->{output_dir}/$fname")
		or confess "Unable to open $self->{output_dir}/$fname for writing; $!\n";
	print OUT @$content;
	close OUT;
}

##
sub getFileStats {
	my $self = shift;
	return $self->{filestats};
}

##
sub outputDir {
	my ($self, $dir) = @_;
	return $self->{output_dir} unless defined($dir);
	if (!mkdir $dir) {
		confess "Unable to create directory $dir: $!\n" if !$! =~ /exists/;
	}
	$self->{output_dir} = $dir;
}

##
sub report {
	my $self = shift;

	foreach my $filestr (keys %{$self->{data}}) {

		# test file modification date. Files that have been touched after the
		# profiling was done may very well produce useless output since the source
		# file might differ from what it looked like before.
		my $tainted = $self->_test_file($filestr);

		my %totalsAccum; # holds all line times. used to find median
		my %totalsByLine; # holds individual line stats
		my $runningTotalTime; # holds the running total 
													# (should equal sum of $totalsAccum)
		my $runningTotalCalls; # holds the running total number of calls.

		foreach my $key (keys %{$self->{data}->{$filestr}}) {
			my $a = $self->{data}->{$filestr}->{$key};

			if (0 == $a->[1]) {
				# The debugger cannot stop on BEGIN{...} lines.  A line in a begin
				# may set a scalar reference to something that needs to be eval'd later.
        # as a result, if the variable is expanded outside of the BEGIN, we'll
        # see the original BEGIN line, but it won't have any calls or times
        # associated. This will cause a divide by zero error.
        $a->[1] = 1;
      }

			push(@{$totalsAccum{'time'}}, $a->[0]);
			push(@{$totalsAccum{'calls'}}, $a->[1]);
			push(@{$totalsAccum{'time/call'}}, $a->[0] / $a->[1]);

			$totalsByLine{$key}->{'time'} += $a->[0];
			$totalsByLine{$key}->{'calls'} += $a->[1];
			$totalsByLine{$key}->{'time/call'} =
				$totalsByLine{$key}->{'time'} / $totalsByLine{$key}->{'calls'};

			$runningTotalTime += $a->[0];
			$runningTotalCalls += $a->[1];
		}

		$self->{filestats}->{$filestr}->{'time'} = $runningTotalTime;
		$self->{filestats}->{$filestr}->{'calls'} = $runningTotalCalls;
		$self->{filestats}->{$filestr}->{'time/call'} = 
				$runningTotalTime / $runningTotalCalls;

		# Use Median Absolute Deviation Formula to get file deviations for each of
		# calls, time and time/call values
		my %statistics = (
			'calls' => calculate_median_absolute_deviation($totalsAccum{'calls'}),
			'time' => calculate_median_absolute_deviation($totalsAccum{'time'}),
			'time/call' => 
					calculate_median_absolute_deviation($totalsAccum{'time/call'}),
		);

		# discover file path
		my $fname = $filestr;
		foreach (@INC) {
			$_ = '\.' if ($_ eq '.');
			$fname =~ s/^$_//;
		}
		$fname =~ s#^[/\\]##o;		# nuke leading / or \
		$fname =~ s#[/\\]#-#go; # replace / and \ with html safe -

		$self->{filestats}->{$filestr}->{html_safe} = $fname;

		# localize header and footer for variable replacement
		my $header = $self->{header};
		my $footer = $self->{footer};
		my $taintmsg = $self->{taintmsg};
		my $datastart = $self->{datastart};
		my $dataend = $self->{dataend};
		my $FILE = $filestr;
		foreach my $transform (@{$self->{replacements}}) {
			my $pattern = $transform->{pattern};
			my $replace = $transform->{replace};

			if ($pattern =~ m/^!~\w+~!$/) {
				# replace variable content
				$replace = eval $replace;
				$header =~ s/$pattern/$replace/g;
				$footer =~ s/$pattern/$replace/g;
				$taintmsg =~ s/$pattern/$replace/g;
				$datastart =~ s/$pattern/$replace/g;
				$dataend =~ s/$pattern/$replace/g;
			}
		}

		# open output file
		open (OUT, "> $self->{output_dir}/$fname$self->{suffix}") or
			confess "Unable to open $self->{output_dir}/$fname$self->{suffix} "
						."for writing: $!\n";

		# begin output
		print OUT $header;
		print OUT $taintmsg if $tainted;
		print OUT $datastart;

		if (! open (IN, $filestr)) {
			confess "Unable to open $filestr for reading: $!\n"
			.'Try running again in the same directory as you ran Devel::NYTProf,'
			."or ensure \@INC is correct.\n";
		}

		my $LINE = 1;	# actual line number. PATTERN variable, DO NOT CHANGE
		foreach my $line (<IN>) {
			chomp $line;
			foreach my $regexp (@{$self->{user_regexp}}) {
				$line =~ s/$regexp->{pattern}/$regexp->{replace}/g;
			}

			# can we get the main package for this file from this line?
			{ 
				local $1;
				$line =~ m/^\s*package\s+([\S]*)\s*;/;
				if (defined $1) {
					my $p = $1;
					$p =~ s/\:\:/-/g;
					my $t = substr($fname, -1 * (length $p) - 3, length $p);
					if($p eq $t) {
						$self->{data}->{$filestr}->{package} = $1;
					}
				}
			}

			# begin output
			foreach my $hash (@{$self->{line}}) {

				# If a function reference is provided, it will control ALL output.
				if (defined $hash->{func}) {
					if ($hash->{value}) {
						print OUT $hash->{func}($hash->{value}, 
											$totalsByLine{$LINE}->{$hash->{value}},
											$statistics{$hash->{value}});
					} else {
						print OUT $hash->{func}($hash->{value});
					}
					next;
				}

				print OUT $hash->{start} if defined $hash->{start};
				if (defined $hash->{value}) {
					if ($hash->{value} eq 'source') {
						print OUT $line; # from source rather than profile db
					} elsif ($hash->{value} eq 'line') {
						print OUT $LINE;
					} elsif (exists $self->{data}->{$filestr}->{$LINE}) {
						print OUT 
								sprintf("%0.".$self->{numeric_precision}->{$hash->{value}}."lf",
												$totalsByLine{$LINE}->{$hash->{value}});
					} else {
						print OUT $hash->{default};
					}
				}
				print OUT $hash->{end} if defined $hash->{end};
			}

			# Increment line number counters
			$LINE++;
		}
		print OUT $dataend;
		print OUT $footer;
		close OUT;
	}
}

1;
__END__
=head1 NAME

Devel::NYTProf::Reader - Tranforms L<Devel::NYTProf> output into comprehensive, easy to read reports in (nearly) arbitrary format.

=head1 SYNOPSIS

 # This module comes with two scripts that implement it:
 #
 # nytprofhtml - create an html report with statistics highlighting
 # nytprofcsv - create a basic comma delimited report
 #
 # They are in the bin directory of your perl path, so add that to your PATH.
 #
 # The csv script is simple, and really only provided as a starting point
 # for creating other custom reports. You should refer to the html script
 # for advanced usage and statistics.

 # First run some code through the profiler to generate the nytprof database.
 perl -d:NYTProf some_perl.pl

 # To create an HTML report in ./profiler 
 nytprofhtml

 # To create a csv report in ./profiler 
 nytprofcsv

 # Or to generate a simple comma delimited report manually
 use Devel::NYTProf::Reader;
 my $reporter = new Devel::NYTProf::Reader('nytprof.out');

 # place to store the output
 $reporter->outputDir($file);

 # set other options and parameters
 $reporter->addRegexp('^\s*', ''); # trim leading spaces

 # generate the report
 $reporter->report();

 # many configuration options exist.  See nytprofhtml, advanced example.

=head1 HISTORY

A bit of history and a shameless plug...

NYTProf stands for 'New York Times Profiler'. Indeed, this module was developed
by The New York Times Co. to help our developers quickly identify bottlenecks in
large Perl applications.  The NY Times loves Perl and we hope the community will benefit from our work as much as we have from theirs.

Please visit L<http://open.nytimes.com>, our open source blog to see what we are up to, L<http://code.nytimes.com> to see some of our open projects and then 
check out L<htt://nytimes.com> for the latest news!

=head1 DESCRIPTION

L<Devel::NYTProf> is a speedy line-by-line code profiler for Perl, written in C.
This module is a complex framework that processes the output file generated by 
L<Devel::NYTProf>

It is capable of producing reports of arbitrary format and varying complexity.

Basically, for each line of code that was executed and reported, this module 
will provide the following statistics:

=over

=item *
 Total calls

=item *
 Total time

=item *
 Average time per call

=item *
 Deviation of all of the above

=item *
 Line number

=item *
 Source code

=back

C<Devel::NYTProf::Reader> will process each source file that it can find in 
your C<@INC> one-by-one.  For each line it processes, it will preform 
transformation and output based instructions that you can optionally provide.  
The configuration is very robust, supporting variations in field ordering, 
pattern substitutions (like converting ascii spaces to html spaces), and user 
callback functions to give you total control.

=head1 CONSTRUCTOR

=over 4

=item $reporter = Devel::NYTProf::Reader->new( );

=item $reporter = Devel::NYTProf::Reader->new( $FILE );

This method constructs a new C<Devel::NYTProf::Reader> object, parses $FILE
and return the new object.  By default $FILE will evaluate to './nytprof.out'.

See: L<Devel::NYTProf> for how the profiler works.

=back

=head1 PARAMETERS

Numerous parameters can be set to modify the behavior of 
C<Devel::NYTProf::Reader>.  The following methods are provided:

=over 4

=item $reporter->outputDir( $output_directory );

Set the directory that generated files should be placed in. [Default: .]

=item $reporter->addRegexp( $pattern, $replace );

Add a regular expression to the top of the pattern stack.  Ever line of output 
will be run through each entry in the pattern stack.  

For example, to replace spaces, < and > with html entities, you might do:

	$reporter->addRegexp(' ', '&nbsp;');
	$reporter->addRegexp('<', '&lt;');
	$reporter->addRegexp('>', '&gt;');

=item $reporter->setParam( $parameter, $value );

Changes the internal value of $parameter to $value.  If $value is omitted, 
returns the current value of parameter.

Basic Parameters:

  Paramter       Description
  ------------   --------------
  suffix         The file suffix for the output file
  header         Text printed at the start of the output file
  taintmsg       Text printed ONLY IF source file modification date is 
                   later than the profile database modification date.
                   Printed just after header
  datastart      Text printed just before report output and after
                   taintmsg
  dataend        Text printed just after report output
  footer         Text printed at the very end of report output
  callsfunc      Reference to a function which must accept a scalar
                   representing the total calls for a line and returns the
                   output string for that field
  timesfunc      Reference to a function which must accept a scalar
                   representing the total time for a line and returns the
                   output string for that field
  time/callsfunc Reference to a function which must accept a scalar
                   representing the average time per call for a line and 
                   returns the output string for that field

Advanced Parameters:

  Paramter         Description
  --------------   --------------
  linestart        Printed at the start of each report line
  lineend          Printed at the end of each report line
  column1          |
  column2          | The four parameters define what to print in each of
  column3          | the four output fields. See below
  column4          |

 Each of these parameters must be set to a hash reference with any of the
 following key/value pairs:

  Key              Value
  -------------    -------------
  start            string printed at the start of the field
  end              string printed at the end of the field
  value            identifier for the value that this field will hold
                     (can be: time, calls, time/calls, source)
  default          string to be used when there is no value for the field
                     specified in the 'value' key

Basic Parameters Defaults:

  Parameter         Default
  --------------   --------------
  suffix           '.csv'
  header           "# Profile data generated by Devel::NYTProf::Reader 
                   v.$VERSION\n # Author: Adam Kaplan. More information at 
                   http://search.cpan.org/~akaplan\n# Format: time,calls,
                   code\n"
  taintmsg         "# WARNING!\n# The source file used in generating this 
                   report has been modified\n# since generating the profiler 
                   database.  It might be out of sync\n"
  datastart        ''
  dataend          ''
  footer           ''
  callsfunc        undef
  timefunc         undef
  time/callsfunc   undef

Advanced Parameters Defaults:

  Parameter         Default
  --------------   --------------
  linestart        {}
  lineend          { end => "\n" }
  column1          { value => 'time',      end => ',', default => '0'}
  column2          { value => 'calls',      end => ',', default => '0'}
  column3          { value => 'time/call',  end => ',', default => '0'}
  column4          { value => 'source',    end => '',  default => '' }

=back

=head1 SUBROUTINES

=over

=item $reporter->report( );

Trigger data processing and report generation. This method will die with 
a message if it fails.  The return value is not defined.  This is where all of
the work is done.

=item $reporter->getFileStats( );

When called after calling C<$reporter-E<gt>report()>, will return a hash containing the cumulative totals for each file.

 my $stats = $reporter->getStats();
 $stats->{FILENAME}->{time}; # might hold 0.25, the total runtime of this file>>

Fields are time, calls, time/call, html-safe.

=item Devel::NYTProf::Reader::calculate_sd( @stats );

Calculates the standard deviation and mean of the values in @stats, returns 
them as a list.

=item Devel::NYTProf::Reader::calculate_median_absolute_deviation( @stats );

Calculates the absolute median deviation and mean of the values in @stats, 
returns them as a list.

=item $reporter->_output_additional( $file, @data );

If you need to create a static file in the output directory, you can use this
subroutine.  It is currently used to dump the CSS file into the html output

=item $reporter->getDatabaseTime(  );

Implemented in XS.  This function returns the time that the NYTProf database 
was created. You should not need to use this.

=item $reporter->process( $file );

Implemented in XS. This function tell does the actual parsing of the database.
It's fairly complicated and you're better off letting 
L<<Devel::NYTProf::Reader->new()>> invoke it for you. The return value is a
series of nested hash refs and array refs containing the parsed data.

=back

=head1 EXPORT

None by default. Object Oriented.

=head1 SEE ALSO

Mailing list and discussion at L<http://groups.google.com/group/develnytprof-dev>

Take a look at the scripts which implement this module, L<nytprofhtml> and
L<nytprofcsv>.  They are probably all that you will need and provide an
excellent jumping point into writing your own custom reports.

You'll need to install and run L<Devel::NYTProf> before you can use anything 
that implements this module... but this is easy (see L<"SYNOPSIS">)

=head1 AUTHOR

Adam Kaplan, akaplan at nytimes dotcom

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2008 by Adam Kaplan and The New York Times Company.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.

=cut
