#!/usr/bin/env perl

use strict;
use warnings;
use feature qw/state say/;
use 5.010;

use Getopt::Declare;
use MIME::Base64;

my $args = new Getopt::Declare q{
  [strict]
  -r[ange] <from:0+n> .. <to:0+n>	write only scans with numbers between <from> and <to>
  <file>				input file
}
  or exit(-1);


if ( my $file = $args->{"<file>"} ) {
  -e $file or die "file '$file' does not exist";
  -f $file or die "'$file' is not a plain file";
  -s $file or die "'$file' has zero size";
  open STDIN, "<$file" or die "can't open '$file': $!";
}

my $from = exists $args->{-r} ? $args->{-r}{"<from>"} : 0;
my $to = exists $args->{-r} ? $args->{-r}{"<to>"} : 2**31 - 1;
die "inverted range: [$from .. $to]" if $from > $to;

$/ = "</peaks>";
while ( <> ) {
  if ( s/(<peaks[^>]+>)(.*)$// ) {
    my ($tag, $data) = ($1, $2);
    s/\n$//;
    if ( /scan num="(\d+)"/ ) {
      next unless $1 >= $from and $1 <= $to;
    }
    else {
      die "cannot determine scan number";
    }

    say "$_$tag";

    # $buf .= pack("NN", unpack("VV", pack("ff", @$peak)));
    my @spec = map {unpack "f", $_} map {pack "V", $_} unpack("N*", decode_base64($data));
    foreach my $i (0 .. scalar @spec / 2 - 1) {
      say join("\t", @spec[2*$i .. 2*$i+1]);
    }

    say "</peaks>";
  }
  else {
    #print;
  }
}

__END__
=head1 NAME

mzxml-unpack - convert a Finnigan raw file to mzXML

=head1 SYNOPSIS

mzxml-unpack [options] <file>

 Options:

  -r[ange] <from:0+n> .. <to:0+n>  write only scans with numbers between <from> and <to>
  <file>                           input file

=head1 OPTIONS

=over 4

=item B<-r[ange] E<lt>from:0+nE<gt> .. E<lt>to:0+nE<gt>>

extract only scans with numbers between E<lt>fromE<gt> and E<lt>toE<gt>

B<Note:> this option breaks the structure of the output file (the parts preceding and following the selected range of scans are not written). It is mainly useful in checking the XML syntax and the contents of a small number of scans. For extracting the scan data in tabular format, there is a more suitable tool, B<uf-scan>.

=back

=head1 DESCRIPTION

B<mzxml-unpack> will read the given input file and unpack the contents of the B<scan.peaks> element. MzXML uses base64 encoding to save space; unpacking this encoding makes the data human-readable. It does not otherwise change the file structure (unless option -r is used), so in principle, it can be packed again.

=head1 SEE ALSO

uf-scan
uf-mzxml

=head1 EXAMPLE

  mzxml-unpack sample.raw > sample-unpacked.mzXML

=cut
