#!/usr/bin/perl
#
# psh - Perl Shell
#
# TODO: Use more obsure file handle names, or use lexical FileHandle objects,
# so that the user cannot get them and their names don't interfere with user
# code.
#
# TODO: Behave more like a shell so that, e.g. `use Shell qw(vi); vi "foo";'
# does what one would expect.
#
# Copyright (C) 1999 Gregor N. Purdy. All rights reserved.
# This script is free software. It may be copied or modified according
# to the same terms as Perl itself.
#

package psh;

use vars qw($VERSION);
$VERSION   = '0.00205';

use Cwd;
use Cwd 'chdir';

use Getopt::Std;

my %opt;

getopts('dr:', \%opt);


#
# TODO: Figure out how to make this useful, including abandoning the current
# input even when it is under the control of ReadLine...
#
# $SIG{'INT'} = sub { print "\n!!!\n"; };
#


#
# Deal with debug mode:
#

if ($opt{'d'}) {
	print "DEBUGGING!\n";

	$^W = 1;    # Simulate the -w command-line switch
	use strict; # Turned on when debugging
}


#
# Global Variables:
#
# $prompt is intended to be accessible to the user via $psh::prompt, 
# which can be set to a string, or to a subroutine ref that returns
# a prompt string.
#
# The other variables are private, lexical variables.
#

use vars qw($bin $news_file $cmd $prompt $smart @wday @mon %prompt_vars %built_ins);
my $default_prompt = '\s\$ ';

my $input;


#
# Set up ReadLine:
#

my $term;

eval "use Term::ReadLine;";

if ($@) {
	$term = undef;
	print "Term::ReadLine not available.\n" if $opt{'d'};
} else {
	$term = Term::ReadLine->new('psh');
	$term->MinLine(0);   # We will handle history adding ourselves (undef causes trouble).
	print "Using ReadLine: ", $term->ReadLine(), "\n" if $opt{'d'};
}


#
# read_until()
#
# Get successive lines via calls to &$get() until one of those
# lines contains exactly the text $terminator. Used to implement
# the `<<EOF` multiline quoting construct;
#
# TODO: Undo any side effects of, e.g., m//.
#

sub read_until
{
	my ($get, $terminator) = @_;
	my $input;
	my $temp;

	$input = '';
	while (1) {
		$temp = &$get();
		last unless defined($temp);
		last if $temp =~ m/^$terminator$/;
		$input .= $temp;
	}

	return $input;
}


#
# process()
#
# Process a source of lines until it returns undef.
#
# The $get argument is a subroutine ref, each call to which
# returns a line of input, or undef if there are no more lines.
#
# The $put argument is a subroutine ref, each call to which
# prints output. It is important for the output routine to
# ensure that output is flushed at each call, since it is
# used to print the prompt.
#
# The $prompt argument is a boolean which indicates whether or
# not to print the prompt. When sourcing files (like .pshrc),
# it is important to not print the prompt string, but for
# interactive use, it is important to print it.
#
# TODO: Undo any side effects, e.g. done by m//.
#

sub which
{
	my $cmd  = shift;
	my @path = split(':', $ENV{PATH});

	foreach my $dir (@path) {
		my $try = "$dir/$cmd";

		if ((-r $try) and (-x $try) and (!-d $try)) { return $try; }
	}

	return undef;
}

%built_ins = (
	'exit'  => sub { exit; },
	'cd'    => sub { chdir @_; },
	'which' => sub {
		my $cmd   = shift;
		if (!defined($cmd) or $cmd eq '') {
			print STDERR "which: requires one argument\n";
		}

		my $which;
		if ($built_ins{$cmd}) { $which = '(built-in)'; }
		else                  { $which = which $cmd;   }

		if (!defined($which)) { print STDERR "which: no $cmd in ($ENV{PATH})\n"; }
		print "$which\n";
	}
);

sub process
{
	my ($get, $put, $prompt) = @_;
	local $cmd;

#	print "[[PROCESSING GENERIC INPUT]]\n";

	while (1) {
		if ($prompt) {
			$input = &$get(prompt_string());
		} else {
			$input = &$get();
		}

		$cmd++;

		last unless defined($input);

		chomp $input;

		if      ($input =~ m/^\s*\#.*$/) {
			undef $input;
		} elsif ($input =~ m/^\s*$/) {
			undef $input;
		} elsif ($input =~ m/^\s*<<([a-zA-Z_0-9\-]*)\s*$/) {
			$input = read_until($get, $1);
		} elsif ($input =~ m/^\s*\.\s+(.+)\s*$/) {
			process_file($1);
			undef $input;
		} elsif ($input =~ m/^\s*!(.+)\s*$/) {
			system($1);
			undef $input;

		#
		# TODO: Really what we need is a perl
		# quoting mechanism that produces an array of arguments using
		# shell-like parsing rules. The idea would be you give it a
		# string which is expected to be a sequence of scalars without
		# commas between them. This is much like qw(), except that it
		# allows internal quoting. Note that the handling of the string
		# sent to system() probably has some of the necessary logic.
		#

		} elsif ($input =~ m/^\s*([a-zA-Z0-9_-]+)(\s+(.*)$|($))/) {
			my $cmd = $1;
			my $arg = $3;

			if (ref $psh::built_ins{$cmd} eq 'CODE') {
				print "Using built-in '$cmd'.\n" if $opt{'d'};
				&{$psh::built_ins{$cmd}}($arg);
				undef $input;
			} elsif ($psh::smart) {
				my $which = which($cmd);

				if (defined($which)) {
					print "Using smart '$which'.\n" if $opt{'d'};
					system("$which $arg");
					undef $input;
				}
			}
		}

		next unless defined($input);

		#
		# TODO: Is this the best way to manage the package context?
		#
		# Consider:
		#
		#     my $pkg = package;
		#     package psh;
		#     ...
		#     package $pkg;
		#     eval ...
		#     package $psh;
		#
		# The idea here is to not force "package main" as it does now.
		#

		my $result;
		package main;
		$result = eval $input;
		package psh;

		my $msg = $@;
		chomp $msg;

		if (!defined($result)) {
			# TODO: This *should* be an indication of trouble, too!
			# But, sub { ... }; and use ...; both have undef results from eval!
#			print STDERR "Error ($msg) evaluating input: `$input'!\n";
		}

		if ($msg) {
			print STDERR "Error ($msg)!\n";
		}
	}
}


#
# process_file()
#

sub process_file
{
	my ($path) = @_;

#	print "[[PROCESSING FILE $path]]\n";

	if (!-r $path) {
		print STDERR "$bin: Cannot read script `$path'\n";
		return;
	}

	if (!open(FILE, $path)) {
		print STDERR "$bin: Cannot open script `$path'\n";
		return;
	}

	process(sub { return <FILE>; });

	close(FILE);

#	print "[[FINISHED PROCESSING FILE $path]]\n";
}


#
# prompt_string()
#
# Construct a prompt string.
#
# TODO: Should we have an entry for '\'?
#

%prompt_vars = (
	'd' => sub {
			my ($wday, $mon, $mday) = (localtime)[6, 4, 3];
			$wday = $wday[$wday];
			$mon  = $mon[$mon];
			return "$wday $mon $mday";
		},
	'h' => sub {
			my $host = `uname -n`;
			chomp $host;
			return $host;
		},
	's' => sub {
			my $shell = $bin;
			$shell =~ s/^.*\///;
			return $shell;
		},
	'n' => sub { return "\n"; },
	't' => sub {
			my ($hour, $min, $sec) = (localtime)[2, 1, 0];
			return sprintf("%02d:%02d:%02d", $hour, $min, $sec);
		},
	'u' => sub {
			# Camel, 2e, p. 172: 'getlogin'.
			return getlogin || (getpwuid($>))[0] || "uid$>";
		},
	'w' => sub { return cwd; },
	'W' => sub { my $dir = cwd; $dir =~ s/^.*\///; return $dir },
	'#' => sub { return $cmd; },
	'$' => sub { return ($> ? '$' : '#'); }
);

sub prompt_string
{
	my $temp;

	#
	# First, get the prompt string from a subroutine or from the default:
	#

	if (ref($prompt) eq 'CODE') { # If it is a subroutine,
		$temp = &$prompt();
	} elsif (ref($prompt)) {      # If it isn't a scalar
		print "psh: Warning: $psh::prompt is neither a SCALAR nor a CODE reference.\n";
		$temp = $default_prompt;
	} else {
		$temp = $prompt;
	}

	#
	# Now, subject it to substitutions:
	#

	while ($temp =~ m/^(.*)\\(.)(.*)$/) {
		my $sub;

		my $var = $prompt_vars{$2};

		if (ref $var eq 'CODE') {
			$sub = &$var();
		} else {
			print "psh: Warning: \$psh::prompt (`$temp') contains unknown escape sequence `\\$2'.\n";
			$sub = ''
		}

		{
			local $1;
			if ($sub =~ m/\\(.)/) {
				print "psh: Warning: Expansion of `\\$2' in prompt string yielded\n";
				print "     string containing `$1'. Stripping escape sequences from\n";
				print "     substitution.\n";
				$sub =~ s/\\(.)//g;
			}
		}

		$temp = $1 . $sub . $3
	}

	return $temp;
}


#
# iget()
#
# Interactive line getting routine. If we have a
# Term::ReadLine instance, use it and record the
# input into the history buffer. Otherwise, just
# grab an input line from STDIN.
#
# readline() returns a line WITHOUT a "\n" at the
# end, and <STDIN> returns one WITH a "\n", UNLESS
# the end of the input stream occurs after a non-
# newline character. So, first we chomp() the
# output of <STDIN> (if we aren't using readline()),
# and then we tack the newline back on in both
# cases. Other code later strips it off if necessary.
#

sub iget
{
	my $prompt = shift;
	my $line;

	if ($term) {
		$line = $term->readline($prompt);
	} else {
		print $prompt;
		$line = <STDIN>;
	}

	chomp $line;
	$line =~ s/^\s+//;
	$line =~ s/\s+$//;

	if ($term and $line) { $term->addhistory($line); }

	return $line . "\n";         # This is expected by other code.
}


#
# news
#

sub news 
{
	if (-r $news_file) {
		return `cat $news_file`;
	} else {
		return '';
	}
}


#
# initialize()
#

sub initialize
{
	$|         = 1;                # Set ouput autoflush on
	$prompt    = $default_prompt;  # Set default prompt
	$smart     = 0;
	$cmd       = 1;

	$bin       = $0;
	$bin       =~ s/.*\///;

	$news_file = "$bin.NEWS";

	#
	# TODO: Internationalize!
	#
	# Although, right now they can be overridden in the .pshrc file
	# via @psh::wday = qw(Dom Lun Mar Mie Jue Vie Sab).
	#

	@wday      = qw(Sun Mon Tue Wed Thu Fri Sat);
	@mon       = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
}


#
# process_rc()
#
# Search for and process .pshrc files.
#

sub process_rc
{
	my @rc;
	my $rc_name = ".pshrc";

#	print "[ LOOKING FOR .pshrc ]\n";

	if ($opt{'r'}) {
		push @rc, $opt{'r'};
	} else {
		if ($ENV{HOME}) { push @rc, "$ENV{HOME}/$rc_name"; }
		push @rc, "$rc_name" unless $ENV{HOME} eq cwd;
	}

	foreach my $rc (@rc) {
		if (-r $rc) {
#			print "[ PROCESSING $rc ]\n";
			process_file($rc);
		}
	}
}


#
# process_args()
#
# Process files listed on command-line.
#

sub process_args
{
	my @rc;
	my $rc_name = ".pshrc";

#	print "[ PROCESSING @ARGV FILES ]\n";

	foreach my $arg (@ARGV) {
		if (-r $arg) {
#			print "[ PROCESSING $arg ]\n";
			process_file($arg);
		}
	}
}


#
# main_loop()
#
# Determine whether or not we are operating interactively,
# set up the input routine accordingly, and process the
# input.
#

sub main_loop
{
	my $interactive = (-t STDIN) and (-t STDOUT);
	my $get;

	#print "[[STARTING MAIN LOOP]]\n";

	if ($interactive) { $get = \&iget;                  }
	else              { $get = sub { return <STDIN>; }; }

	process($get, sub { print @_; }, $interactive);
}


#
# symbols()
#
# Print out the symbols of each type used by a package. Note: in testing,
# it bears out that the filehandles are present as scalars, and that arrays
# are also present as scalars. The former is not particularly surprising,
# since they are implemented as tied objects. But, use vars qw(@X) causes
# both @X and $X to show up in this display. This remains mysterious.
#

sub symbols
{
	my $pack = shift;
	my (@ref, @scalar, @array, @hash, @code, @glob, @handle);
	my @sym;

	{
		no strict qw(refs);
		@sym = keys %{*{"${pack}::"}};
	}

	for my $sym (sort @sym) {
		next unless $sym =~ m/^[a-zA-Z]/; # Skip some special variables
		next if     $sym =~ m/::$/;       # Skip all package hashes

		{
			no strict qw(refs);

			push @ref,    "\$$sym" if ref *{"${pack}::$sym"}{SCALAR} eq 'REF';
			push @scalar, "\$$sym" if ref *{"${pack}::$sym"}{SCALAR} eq 'SCALAR';
			push @array,  "\@$sym" if ref *{"${pack}::$sym"}{ARRAY}  eq 'ARRAY';
			push @hash,   "\%$sym" if ref *{"${pack}::$sym"}{HASH}   eq 'HASH';
			push @code,   "\&$sym" if ref *{"${pack}::$sym"}{CODE}   eq 'CODE';
#			push @glob,   "\*$sym" if ref *{"${pack}::$sym"}{GLOB}   eq 'GLOB';
			push @handle, "$sym"   if ref *{"${pack}::$sym"}{FILEHANDLE};
		}
	}

	print "Reference: ", join(' ', @ref),    "\n";
	print "Scalar:    ", join(' ', @scalar), "\n";
	print "Array:     ", join(' ', @array),  "\n";
	print "Hash:      ", join(' ', @hash),   "\n";
	print "Code:      ", join(' ', @code),   "\n";
#	print "Glob:      ", join(' ', @glob),   "\n";
	print "Handle:    ", join(' ', @handle), "\n";
}


#
# MAIN:
#

initialize;
process_rc;

if (@ARGV) {
	process_args;
} else {
	main_loop;
}

exit 0;

__END__

=pod

=head1 NAME

psh - Perl Shell


=head1 SYNOPSIS

A fairly simple read-eval loop. The C<-w> flag and 'C<use strict>' are
not employed so that the user is not bound by their stipulations.
Setting C<$^W = 1> will turn on warnings, and calling 'C<use strict>'
will (almost) do the usual thing if called by the user (see LIMITATIONS,
below).


=head1 DESCRIPTION

Each line of input is read and immediately evaluated.

Multiline input may be entered by starting with a line like C<E<lt>E<lt>XXX>,
followed by lines of input not having C<XXX> on a line by itself,
followed by such a line. If C<XXX> is not specified, then the first
blank line terminates the input.

An input line beginning with `!' will be given as a parameter to
the C<system()> Perl function.

An input line beginning with `.' followed by a space and a file name
will cause the contents of the specified file to be read in and
evaluated.

If C<$ENV{HOME}> is set, and the file C<$ENV{HOME}/.pshrc> is present,
it will be read in and evaluated before processing begins. If not,
but C<.pshrc> is present in the current directory, it will be
read and executed.


=head2 PROMPT STRINGS

Setting the variable C<$psh::prompt> to a string will cause that string
to be used as the prompt-string. Setting it to a subroutine reference
causes the result of running that subroutine to be used each time.
For example,

  $psh::prompt = sub { $i++; "psh [$i]\$ "; }

will cause the prompt to be C<psh [1]$> followed by C<psh [2]$>, and so on.

C<psh> uses some of the same ``prompting variables'' as C<bash>. They are
accessed by placing a backslash followed by the code in the prompt string,
either hard coded, or as returned by the prompt string function. The
variables supported are:

=over 4

=item d The date in ``Weekday Month Day'' format

=item h The hostname

=item n A carriage return and line feed

=item s The name of the shell

=item t The current time in HH:MM:SS format

=item u The username of the current user

=item w The current working directory

=item W The basename of the current working directory

=item # The command number of the current command

=item $ `#' if the effective UID is zero, else `$'

=back

Custom prompting variables may be added by adding entries to the array
C<%psh::prompt_vars> keyed by the single character code. The entries
should be subroutine references that return the replacment string.


=head2 BUILT-IN FUNCTIONS

The following functions are built into C<psh>:

=over 4

=item * C<cd>

Change the working directory.

=item * C<exit>

Exit out of the shell.

=item * C<which>

Search the directories in the C<PATH> environment variable for an executable
of the name given. Return the path of the first such executable found.

=back


=head1 LIMITATIONS

The loop inside C<psh> will clobber C<$1> and other variables because
it uses matches to implement some of its special functions.

Very little error checking is done.

The effect of `use foo' is not as expected. Such statements, when
sent to Perl's eval() function, cause eval() to return undef, which is
supposed to indicate an error. Simple detection of isolated use statements
could be hacked in, but it would not be general, and would therefore
be fragile.


=head1 OTHER PERL SHELLS

Larry Wall exhibits the simple Perl shell C<while (E<lt>E<gt>) { eval; print $@; }> on
page 161 of the Camel Book (2nd Edition).

Rich Graves E<lt>F<rcgraves@brandeis.edu>E<gt> posted a comment to the original
psh-0.001 announcement on C<http://freshmeat.net>, which contained this
gem that leverages the Perl debugger: C<perl -d -e 1>;


=head1 FILES

C<psh> - The Perl Shell executable script.

C<.pshrc> - The user's Perl Shell `profile'. May be in C<$HOME> or the
current directory.


=head1 AUTHOR

Gregor N. Purdy, E<lt>F<gregor@focusresearch.com>E<gt>


=head1 CREDITS

The following people made contributions to this project.

=over 4

=item ReadLine Support

Code examples showing how to apply the Term::ReadLine package were contributed
by Billy Naylor E<lt>F<billy.naylor@eu.net>E<gt> (in his C<pash.pl> program,
which is his own Perl shell).

=item Symbol Table Dumping

Billy Naylor E<lt>F<billy.naylor@eu.net>E<gt> also had an example of a symbol
table printing function that was used as the starting point for the C<psh>
function C<psh::symbols()>. The C<psh> version adds the ability to specify a
package name, and it also filters out some special variables. The implementation
technique is also different from Billy's.

=item Prompt String Variables

Matthew D. Allen E<lt>F<s2mdalle@titan.vcu.edu>E<gt> contributed an enhanced
prompt string handling routine that emulates the C<bash> prompt variables. This
was expanded into the form now present.

=item Typo Spotting

Allan Kelly E<lt>F<akelly@holyrood.ed.ac.uk>E<gt> found some problems with the
generated documentation.

=back


=head1 COPYRIGHT

Copyright (C) 1999 Gregor N. Purdy. All rights reserved.
This script is free software. It may be copied or modified according
to the same terms as Perl itself.

=cut


#
# End of file.
#

