#! /usr/local/bin/perl -w
#
# naive - generate a regular expression that will lex regular expressions
# it is naive in the sense that it can't lex certain patterns, such as a
# pattern that contains nested parentheses (e.g. /ab(?:cd(?ef)?gh)+ij/ )
#
# The emitted regular expression is the default pattern that Regexp::Assemble
# uses to pick apart a string into tokens suitable for being assembled. If it
# isn't sufficiently sophisticated, you will have to supply your own lexer.
#
# Copyright (C) David Landgren 2004-2005

use strict;

my $meta   =  q{\\\\[aefnrtdDwWsS]};
my $compl  =  q{[^\\w*+?@]};
my $punct  = qq{\\\\$compl};
my $single =  q{\\\\[^\\w\\/{|}-]};
my $octal  =  q{\\\\0\d{2}};
my $hex    =  q{\\\\x[\da-fA-F]{2}};
my $ctrl   =  q{\\\\c.};
my $named  =  q{\\\\N\{\w+\}};
my $prop   =  q{\\\\[Pp](?:.|\{\w+\})};
my $class  =  q{\\[.*?(?<!\\\\)\\]};
my $paren  =  q{\\(.*?(?<!\\\\)\\)};

my $modifiable = qq{$meta|$punct|$octal|$hex|$ctrl|$named|$prop|$class|$paren|.};
my $modifier   = q{(?:(?:[*+?]|\\{\\d+(?:,\\d*)?\\})\\??)};
my $directive  = q{\\\\[bluABCEGLQUXZ]};

print <<PATTS;
# The following patterns were generated with eg/naive
\$Default_Lexer = qr/$directive|(?:$modifiable)$modifier?/;

\$Single_Char   = qr/^(?:$meta|$single|$octal|$hex|$ctrl|.)\$/;

# unmeta = $compl
# -end-
PATTS

__END__
$Default_Lexer = qr/(?:\\[bluABCEGLUXZ]|(?:\\[aefnrtdDwWsS]|\\[^\w*+?@]|\\0\d{2}|\\x(?:[\da-fA-F]{2}|{[\da-fA-F]{4}})|\\c.|\\N{\w+}|\\[Pp](?:.|{\w+})|\[.*?(?<!\\)\]|\(.*?(?<!\\)\)|.)(?:(?:[*+?]|\{\d+(?:,\d*)?\})\??)?)/;

# Character class candidates
$Single_Char = qr/^(?:\\[aefnrtdDwWsS]|\\[^\w\/{|}-]|\\0\d{2}|\\x(?:[\da-fA-F]{2}|{[\da-fA-F]{4}})|\\c.|.)$/;
