#!/usr/bin/perl -w

use strict;
use warnings;
use Getopt::Long;

# rsup - Upgrade RiveScript 1.x code to 2.x standards.
# Usage: rsup --out ./outdirectory <rs docs or directories>
our $VERSION = '0.01';
my $help = 0;
my $out  = '';
my $ext  = '.rs';
my $bak  = 0;
my $fixobj = 0;
my $opts = GetOptions (
	"help|h"        => \$help,
	"backup|bak|b"  => \$bak,
	"dontfixperl|p" => \$fixobj,
	"out|o=s"       => \$out,
	"ext|x=s"       => \$ext,
);
our @warnings = ();

# Asking for help?
if ($help) {
	&help();
}

# Verify that the output directory is writable.
if (length $out) {
	if (!-d $out) {
		die "Output directory $out doesn't exist!";
	}
	if (!-w $out) {
		die "Output directory $out is not writable!";
	}
}

# Collect the rest of the arguments.
my @in = @ARGV;
if (scalar(@in) == 0) {
	&usage();
}

# Process each argument.
foreach my $item (@in) {
	if (-d $item) {
		# This is a directory, so open it.
		opendir (DIR, $item);
		foreach my $file (sort(readdir(DIR))) {
			if ($file =~ /\~$/) {
				if ($bak == 0) {
					# Skip backup files~
					next;
				}
			}
			if ($file =~ /\Q$ext\E/i) {
				&parseFile("$item/$file");
			}
		}
		closedir (DIR);
	}
	elsif (-f $item) {
		# This is a file.
		if ($item =~ /\Q$ext\E/i) {
			&parseFile($item);
		}
	}
}

# Any warnings?
if (scalar(@warnings)) {
	print "\n";
	print "=" x 60;
	print "\n"
		. "The following warnings were found during execution:\n\n"
		. join("\n",@warnings) . "\n";
}

sub parseFile {
	my $file = shift;

	print "<= Reading $file\n";

	open (FILE, $file);
	my @read = <FILE>;
	close (FILE);
	chomp @read;

	# Create a buffer for the new file.
	my @new = (
		"// Converted to RiveScript 2 by rsup v. $VERSION",
		"// Generated on " . localtime(time()),
		'',
		"! version = 2.0",
		'',
	);

	my $lineno = 0;
	my $skippedLast = 0;
	my $inComment = 0;
	my $inObject = 0;
	foreach my $line (@read) {
		$lineno++;

		# See if we're inside an object.
		if ($inObject) {
			if ($line =~ /^<\s*object/i) {
				# Ends the object.
				$inObject = 0;
				push (@new, "< object");
				next;
			}

			# Attempt to fix up the Perl code if we can parse it.
			if ($fixobj == 0) {
				if ($line =~ /my \((.+?)\) = \@_/i) {
					$line =~ s/my \((.+?)\) = \@_/my (\$rs,$1) = \@_/ig;
					print "\tFixed obj. line: $line\n";
				}
			}
			push (@new,$line);
			next;
		}

		if ($inComment) {
			if ($line =~ /\*\//) {
				$inComment = 0;
				push (@new,$line);
				next;
			}
			push (@new,$line);
			next;
		}

		# Further chomp the line.
		$line =~ s/^(\t|\x0a|\x0d|\s)+//g;
		$line =~ s/(\t|\x0a|\x0d|\s)+$//g;

		# Blank lines?
		if (length $line == 0) {
			push (@new,$line);
			next;
		}

		if ($line =~ /^\#/) {
			# Single-line comment.
			push (@new,$line);
			next;
		}
		elsif ($line =~ /^\/\//) {
			# Single-line // comment.
			push (@new,$line);
			next;
		}
		elsif ($line =~ /^\/\*/) {
			# Start of a multi-line comment.
			if ($line =~ /\*\//) {
				# It ends on the same line.
				push (@new,$line);
				next;
			}
			push (@new,$line);
			$inComment = 1;
			next;
		}
		elsif ($line =~ /\*\//) {
			# End of a multi-line comment.
			push (@new,$line);
			$inComment = 0;
			next;
		}

		# Convert &object.syntax() to <call> syntax.
		if ($line !~ /^\&/) {
			while ($line =~ /\&([A-Za-z0-9\.\s]+)\((.+?)\)/) {
				my $before = '&' . $1 . '(' . $2 . ')';
				my (@cmds) = split(/\./, $1);
				my $cmd = join(" ",@cmds);
				my $args = $2;
				$line =~ s/\&(.+?)\((.+?)\)/<call>$cmd $args<\/call>/ig;
				$line =~ s/<call>(.+?)\s+?<\/call>/<call>$1<\/call>/ig;
				print "\tConverted object call format at $file line $lineno.\n"
					. "\t\t$before  =>  $line\n";
			}
		}

		# Separate the command from the data.
		my ($cmd) = $line =~ /^(.)/i;
		$line =~ s/^([^\s]+)\s+//i;

		# Skipping this line?
		my $skip = 0;

		# Process the command.
		if ($cmd eq '^') {
			# This is a continue command. If we've skipped the line it continues, skip this too.
			if ($skippedLast) {
				next;
			}
		}
		elsif ($cmd eq '!') {
			my @fields = split(/\s+/, $line);
			my $type = $fields[0];

			# Make sure this isn't a RS version line.
			if ($type =~ /version/i) {
				my $v = $fields[2];
				if (int($v) >= 2) {
					print "\tSkipping file: it's already RiveScript v. 2 or greater.\n";
					return;
				}
			}

			# Obsolete types:
			if ($type =~ /(addpath|include|syslib)/i) {
				print "\tRemoving obsolete definition type \"$type\" at $file line $lineno.\n";
				$skip = 1;
			}
		}
		elsif ($cmd eq '>') {
			my @fields = split(/\s+/, $line);
			my $type = $fields[0];

			# Objects are slightly different now.
			if ($type =~ /^object/i) {
				my $name = $fields[1];
				if (length $name) {
					my $before = $line;
					$line = "object $name perl";
					$inObject = 1;
					print "\tUpdated object declaration at $file line $lineno.\n"
						. "\t\t$before  ==>  $line\n";
				}
				else {
					print "\tWarning: found object at $file line $lineno but can't determine its name.\n";
					push (@warnings,"Found object at $file line $lineno but can't determine its name.\n"
						. "\t$cmd $line");
					$inObject = 1;
				}
			}
		}
		elsif ($cmd eq '*') {
			my ($cond,$do) = ('','');
			my $before = $line;
			if ($line =~ /=\>/) {
				($cond,$do) = split(/=\>/, $line, 2);
			}
			elsif ($line =~ /::/) {
				($cond,$do) = split(/::/, $line, 2);
			}
			else {
				print "\tWarning: can't parse conditionals at $file line $lineno.\n";
				push (@warnings,"Can't parse conditionals at $file line $lineno:\n"
					. "\t$cmd $line");
				next;
			}

			$cond =~ s/^\s+//g;
			$cond =~ s/\s+$//g;
			$do =~ s/^\s+//g;
			$do =~ s/\s+$//g;

			my ($left,$eq,$right) = ($cond =~ /^(.+?)\s*(=|\!=|\<|\<=|\>|\>=|\?)\s*(.+?)$/i);

			if ($eq eq '=') {
				if ($right =~ /^[0-9]+$/) {
					$line = "<get $left> == $right => $do";
				}
				else {
					$line = "<get $left> eq $right => $do";
				}
			}
			elsif ($eq eq '!=') {
				if ($right =~ /^[0-9]+$/) {
					$line = "<get $left> != $right => $do";
				}
				else {
					$line = "<get $left> ne $right => $do";
				}
			}
			elsif ($eq eq '?') {
				$line = "<get $left> != undefined => $do";
			}
			else {
				$line = "<get $left> $eq $right => $do";
			}

			print "\tConverted conditionals at $file line $lineno.\n"
				. "\t\tBefore: $before\n"
				. "\t\tAfter:  $line\n";
		}
		elsif ($cmd eq '&') {
			# This command is obsolete.
			print "\tSkipping obsolete Perl command (&) at $file line $lineno.\n";
			$skip = 1;
		}

		# Skipping this line?
		if ($skip) {
			$skippedLast = 1;
			next;
		}

		$skippedLast = 0;
		if ($cmd =~ /^(\!|>|\+|\-|\%|\^|\@|\*|\#)$/i) {
			push (@new,join(" ",$cmd,$line));
		}
		else {
			push (@new,join("",$cmd,$line));
		}
	}

	# Cut off the directory.
	my $name = $file;
	if (length $out) {
		my @parts = split(/(\/|\\)/, $file);
		$name = pop(@parts);
	}

	# Save the file.
	if (length $out) {
		print "=> Writing $out/$name\n";
		open (WRITE, ">$out/$name");
		print WRITE join("\n",@new);
		close (WRITE);
	}
	else {
		print "=> Writing $name\n";
		open (WRITE, ">$name");
		print WRITE join("\n",@new);
		close (WRITE);
	}
}

sub usage {
	print "Usage: rsup [--out --ext --backup --dontfixperl] <docs or directories>\n"
		. "Try `rsup --help` for more information.\n";
	exit(0);
}

sub help {
	exit(0);
}

=head1 NAME

rsup - Upgrade RiveScript 1.x documents to the new 2.x standards.

=head1 SYNOPSIS

  rsup [--out --ext --backup --dontfixperl] <files or folders>

=head1 DESCRIPTION

When RiveScript was rewritten to a new standard, a certain areas of backwards
compatibility became broken. See the L<RiveScript::WD> document for details of
the incompatible changes.

This command-line tool can upgrade obsolete RiveScript code to fix these
incompatibilities and allow it to be parsed by a RiveScript 2 interpreter.

=head1 OPTIONS

=over 4

=item --out <directory>

=item -o

Specify a directory to output the new documents. If not specified, the files
being read from will be replaced with the new documents.

=item --ext <extension=.rs>

=item -x

For any arguments that are directories, all files in that directory ending with
this extension are read. Default is C<.rs>

=item --backup

=item --bak

=item -b

Specify this flag if you want backup files (such as those created by Emacs
or gEdit) to be processed. The default is to B<not> read these files.

=item --dontfixperl

=item -p

When reading in Perl objects, C<rsup> will, by default, attempt to fix the C<@_>
lines to include C<$rs>, the reference to the RS instance. Since this will
modify the code of your object, you can specify this flag to disable this
feature.

=item directories or documents

After specifying command-line arguments, give C<rsup> a list of directories or
files to work on. For directories, they are opened and any RiveScript documents
inside are automatically processed. For individual files, just these files will
be processed.

=back

=head1 CAVEATS

This program is still under development. It tries its best to upgrade old
RiveScript code to the new standards, but it's not perfect. It will output
everything it changes to the terminal, but you may need to go through and make
some custom tweaks to fix anything that it didn't translate properly.

=head1 AUTHOR

Casey Kirsle, http://www.rivescript.com/

=cut
