package CodeGen::Cpppp;
use v5.20;
use warnings;
use Carp;

our $VERSION = '0.001_03'; # TRIAL VERSION
# ABSTRACT: The C Perl-Powered Pre-Processor


sub new {
   my ($class, %attrs)= @_;
   bless \%attrs, $class;
}


our $next_pkg= 1;
sub compile_template {
   my ($self, $in, $filename, $line)= @_;
   my $parse= $self->_parse_cpppp($in, $filename, $line);
   my $pkg= 'CodeGen::Cpppp::_Template'.$next_pkg++;
   my $perl= "package $pkg;\n"
      ."use v5.20;\n"
      ."use warnings;\n"
      ."no warnings 'experimental::lexical_subs', 'experimental::signatures';\n"
      ."use feature 'lexical_subs', 'signatures';\n"
      ."use CodeGen::Cpppp::Template -setup;\n"
      ."sub process(\$self) {\n"
      ."$parse->{code};\n"
      ."}\n"
      ."1\n";
   unless (eval $perl) {
      my $err= "$@";
      STDERR->print($perl);
      die $err;
   }
   $pkg->_set_parse_data($parse);
   return $pkg;
}

sub _parse_cpppp {
   my ($self, $in, $filename, $line)= @_;
   my $line_ofs= $line? $line - 1 : 0;
   if (ref $in eq 'SCALAR') {
      my $tmp= $in;
      utf8::encode($tmp) if utf8::is_utf8($tmp);
      undef $in;
      open($in, '<', $tmp) or die;
      defined $in or die;
   }
   $self->{cpppp_parse}= {
      autocomma => 1,
      autostatementline => 1,
      autoindent => 1,
   };
   my ($perl, $tpl_start_line, $cur_tpl);
   my $end_tpl= sub {
      if ($cur_tpl =~ /\S/) {
         my $parsed= $self->_parse_code_block($cur_tpl, $filename, $tpl_start_line);
         $perl .= $self->_gen_perl_call_code_block($parsed);
      }
      $cur_tpl= undef;
   };
   while (<$in>) {
      if (/^#!/) { # ignore #!
      }
      elsif (/^##(?!#)/) { # full-line of perl code
         if (defined $cur_tpl) {
            &$end_tpl;
            $perl .= '# line '.($.+$line_ofs).qq{ "$filename"\n};
         }
         elsif (!defined $perl) {
            $perl= '# line '.($.+$line_ofs).qq{ "$filename"\n};
         }
         s/^##\s?//;
         my $pl= $_;
         $perl .= $self->_process_template_perl($pl);
      }
      elsif (/^(.*?) ## ?((?:if|unless) .*)/) { # perl conditional suffix, half tpl/half perl
         my ($tpl, $pl)= ($1, $2);
         &$end_tpl if defined $cur_tpl;
         $tpl_start_line= $. + $line_ofs;
         $cur_tpl= $tpl;
         &$end_tpl;
         $perl =~ s/;\s*$//; # remove semicolon
         $pl .= ';' unless $pl =~ /;\s*$/; # re-add it if user didn't
         $perl .= qq{\n# line }.($.+$line_ofs).qq{ "$filename"\n    $pl\n};
      }
      else { # default is to assume a line of template
         if (!defined $cur_tpl) {
            $tpl_start_line= $. + $line_ofs;
            $cur_tpl= '';
         }
         $cur_tpl .= $_;
      }
   }
   &$end_tpl if defined $cur_tpl;
   $self->{cpppp_parse}{code}= $perl;
   delete $self->{cpppp_parse};
}

sub _process_template_perl {
   my ($self, $pl)= @_;
   # If user declares "sub NAME(", convert that to "my sub NAME" so that we
   # can grab a ref to it later.
   if ($pl =~ /\b sub \s* (\w+) \s* \(/x) {
      push @{$self->{cpppp_parse}{named_subs}}, $1;
      # look backward and see if it already started with 'my'
      my $pos= rindex($pl, "my", $-[0]);
      if ($pos == -1) {
         substr($pl, $-[0], 0, 'my ');
      }
   }
   # If user declares "##define name(", convert that to both a method and a define
   if ($pl =~ /\b define \s* (\w+) (\s*) \(/x) {
      push @{$self->{cpppp_parse}{named_subs}}, $1;
      substr($pl, $-[2], $+[2]-$-[2], '=> \$self->{define}{'.$1.'}; my sub '.$1);
   }
   $pl;
}

sub _gen_perl_call_code_block {
   my ($self, $parsed)= @_;
   my $codeblocks= $self->{cpppp_parse}{code_block_templates} ||= [];
   push @$codeblocks, $parsed;
   my $code= '$self->_render_code_block('.$#$codeblocks;
   my %cache;
   my $i= 0;
   my $cur_line= 0;
   for my $s (@{$parsed->{subst}}) {
      if (defined $s->{eval}) {
         # No need to create more than one anonsub for the same expression
         if (defined $cache{$s->{eval}}) {
            $s->{eval_idx}= $cache{$s->{eval}};
            next;
         }
         $cache{$s->{eval}}= $s->{eval_idx}= $i++;
         my $sig= $s->{eval} =~ /self|output/? '($self, $output)' : '';
         if ($s->{line} == $cur_line) {
            $code .= qq{, sub${sig}{ $s->{eval} }};
         } elsif ($s->{line} == $cur_line+1) {
            $cur_line++;
            $code .= qq{,\n  sub${sig}{ $s->{eval} }};
         } else {
            $code .= qq{,\n# line $s->{line} "$parsed->{file}"\n  sub${sig}{ $s->{eval} }};
            $cur_line= $s->{line};
            $cur_line++ for $s->{eval} =~ /\n/g;
         }
      }
   }
   $code .");\n";
}

sub _parse_code_block {
   my ($self, $text, $file, $orig_line)= @_;
   $text .= "\n" unless substr($text,-1) eq "\n";
   if ($text =~ /^# line (\d+) "([^"]+)"/) {
      $orig_line= $1-1;
      $file= $2;
   }
   local our $line= $orig_line || 1;
   local our $start;
   local our @subst;
   local $_= $text;
   # Parse and record the locations of the embedded perl statements
   ()= m{
      (?(DEFINE)
         (?<BALANCED_EXPR> (?>
              \{ (?&BALANCED_EXPR) \}
            | \[ (?&BALANCED_EXPR) \]
            | \( (?&BALANCED_EXPR) \)
            | [^[\](){}\n]+
            | \n (?{ $line++ })
         )* )
      )
      [\$\@] (?{ $start= -1+pos }) 
      (?:
        \{ (?&BALANCED_EXPR) \}           # 
        | [\w_]+                          # plain variable
         (?:                              # maybe followed by ->[] or similar
            (?: -> )?
            (?: \{ (?&BALANCED_EXPR) \} | \[ (?&BALANCED_EXPR) \] )
         ) *                       
      ) (?{ push @subst, { pos => $start, len => -$start+pos, line => $line };
            
          })
      | \n     (?{ $line++ })
   }xg;
   
   for (0..$#subst) {
      my $s= $subst[$_];
      # Special cases
      my $expr= substr($text, $s->{pos}, $s->{len});
      if ($expr eq '$trim_comma') {
         # Modify the text being created to remove the final comma
         $s->{fn}= sub { ${$_[1]} =~ s/,(\s*)$/$1/; '' };
      } elsif ($expr =~ /^ \$\{\{ (.*) \}\} $/x) {
         # Notation ${{ ... }} is a shortcut for @{[do{ ... }]}
         $s->{eval}= $1;
      } else {
         $s->{eval}= $expr; # Will need to be filled in with a coderef
      }
   }
   # Detect columns.  Look for any location where two spaces occur.
   local our %cols;
   local our $linestart= 0;
   $line= $orig_line || 1;
   pos= 0;
   while (m{\G(?>
        \n (?{ ++$line; $linestart= pos })
      | [ ][ ]+ (?{ push @{$cols{-$linestart + pos}}, { pos => pos, len => 0, line => $line  } })
      | .
   )}xcg) {}
   warn "BUG: failed to parse columns" unless pos == length($text);
   # Delete all column markers that occur inside of code substitutions
   for my $s (@subst) {
      for my $col (grep $_ > $s->{pos} && $_ < $s->{pos} + $s->{len}, keys %cols) {
         my $markers= $cols{$col};
         @$markers= grep $_->{pos} > $s->{pos}+$s->{len} || $_->{pos} < $s->{pos},
            @$markers;
      }
   }
   # Detect the actual columns from the remaining markers
   my $colgroup= 0;
   for my $col (sort { $a <=> $b } keys %cols) {
      # Find out which column markers are from adjacent lines
      my $lines= $cols{$col};
      my @adjacent= [ $lines->[0] ];
      for (1..$#$lines) {
         if ($adjacent[-1][-1]{line} + 1 == $lines->[$_]{line}) {
            push @{ $adjacent[-1] }, $lines->[$_];
         } else {
            push @adjacent, [ $lines->[$_] ];
         }
      }
      # Need at least 2 adjacent lines to count as a colum.
      for (grep @$_ > 1, @adjacent) {
         # At least one of the lines must have text to the left of it
         my $has_left= 0;
         for (@$_) {
            my $linestart= rindex($text, "\n", $_->{pos})+1;
            if (substr($text, $linestart, $_->{pos}-$linestart) =~ /\S/) {
               $has_left= 1;
               last;
            }
         }
         next unless $has_left;
         # this is a new linked column group
         ++$colgroup;
         # add one column marker per line in this group
         push @subst, map +{ colgroup => $colgroup, pos => $_->{pos}, len => 0, line => $_->{line} }, @$_;
      }
   }
   # Now merge the column markers into the substitutions in string order
   @subst= sort { $a->{pos} <=> $b->{pos} or $a->{len} <=> $b->{len} } @subst;
   
   { text => $text, subst => \@subst, file => $file }
}

package CodeGen::Cpppp::Template;
$INC{'CodeGen/Cpppp/Template.pm'}= 1;
use v5.20;
use warnings;
use Carp;

sub import {
   my $class= shift;
   my $caller= caller;
   for (@_) {
      if ($_ eq '-setup') {
         no strict 'refs';
         push @{$caller.'::ISA'}, $class;
      } else { croak "$class does not export $_" }
   }
}

sub _set_parse_data {
   my ($class, $parse)= @_;
   no strict 'refs';
   ${$class.'::_parse_data'}= $parse;
}

sub new {
   my $class= shift;
   no strict 'refs';
   bless {
      %{${$class.'::_parse_data'}},
      out => {
         public => '',
         protected => '',
         private => '',
         decl => '',
         impl => '',
      }
   }, $class;
}

sub render {
   my $self= shift;
   $self->{process_result} //= $self->process;
   return $self->{out}{impl};
}

sub _render_code_block {
   my ($self, $i, @expr_subs)= @_;
   my $block= $self->{code_block_templates}[$i];
   my $text= $block->{text};
   my $newtext= '';
   my $at= 0;
   my %colmarker;
   my $prev_colmark;
   # First pass, perform substitutions and record new column markers
   my sub str_esc{ join '', map +(ord($_) > 0x7e || ord($_) < 0x21? sprintf("\\x{%X}",ord) : $_), split //, $_[0] }
   for my $s (@{$block->{subst}}) {
      $newtext .= substr($text, $at, $s->{pos} - $at);
      if ($s->{colgroup}) {
         my $mark= $colmarker{$s->{colgroup}} //= join '', "\x{200A}", map chr(0x2000+$_), split //, $s->{colgroup};
         $newtext .= $mark;
         $prev_colmark= $s;
      }
      elsif (defined $s->{fn}) {
         $newtext .= $s->{fn}->($self, \$newtext);
      }
      elsif (defined $s->{eval_idx}) {
         my $fn= $expr_subs[$s->{eval_idx}]
            or die;
         # Avoid using $_ up to this point so that $_ pases through
         # from the surrounding code into the evals
         my @out= $fn->($self, \$newtext);
         # Expand arrayref and coderefs in the returned list
         @out= @{$out[0]} if @out == 1 && ref $out[0] eq 'ARRAY';
         ref eq 'CODE' && ($_= $_->($self, \$newtext)) for @out;
         # Now decide what to join them with.
         my $join_sep= $";
         my $indent= '';
         my ($last_char)= ($newtext =~ /(\S) (\s*) \Z/x);
         my $cur_line= substr($newtext, rindex($newtext, "\n")+1);
         my $inline= $cur_line =~ /\S/;
         if ($self->{autoindent}) {
            ($indent= $cur_line) =~ s/\S/ /g;
         }
         # Special handling if the user requested a list substitution
         if (ord $s->{eval} == ord '@') {
            $last_char= '' unless defined $last_char;
            if ($self->{autocomma} && ($last_char eq ',' || $last_char eq '(')) {
               if (@out) {
                  $join_sep= $inline? ', ' : ",\n";
                  @out= grep /\S/, @out; # remove items that are only whitespace
               }
               # If no items, or the first nonwhitespace character is a comma,
               # remove the previous comma
               if (!@out || $out[0] =~ /^\s*,/) {
                  $newtext =~ s/,(\s*)\Z/$1/;
               }
            } elsif ($self->{autostatementline} && ($last_char eq '{' || $last_char eq ';')) {
               @out= grep /\S/, @out; # remove items that are only whitespace
               $join_sep= $inline? "; " : ";\n";
            } elsif ($self->{autoindent} && !$inline && $join_sep !~ /\n/) {
               $join_sep .= "\n";
            }
         }
         my $out= join $join_sep, @out;
         # Autoindent: if new text contains newline, add current indent to start of each line.
         if ($self->{autoindent} && $indent) {
            $out =~ s/\n/\n$indent/g;
         }
         $newtext .= $out;
      }
      $at= $s->{pos} + $s->{len};
   }
   $text= $newtext . substr($text, $at);
   # Second pass, adjust whitespace of all column markers so they line up.
   # Iterate from leftmost column rightward.
   autoindent: for my $group_i (sort { $a <=> $b } keys %colmarker) {
      my $token= $colmarker{$group_i};
      # Find the longest prefix (excluding trailing whitespace)
      my $maxcol= 0;
      my ($linestart, $col);
      while ($text =~ /[ ]*$token/mg) {
         $linestart= rindex($text, "\n", $-[0])+1;
         $col= $-[0] - $linestart;
         $maxcol= $col if $col > $maxcol;
      }
      $text =~ s/[ ]*$token/
         $linestart= rindex($text, "\n", $-[0])+1;
         " "x(1 + $maxcol - ($-[0] - $linestart))
         /ge;
   }
   $self->{out}{impl} .= $text;
}

package CodeGen::Cpppp::Template::Imports;
use Exporter;
our @EXPORT_OK= qw( PUBLIC PROTECTED PRIVATE );
our %EXPORT_TAGS= ( all => \@EXPORT_OK );

sub PUBLIC {}
sub PROTECTED {}
sub PRIVATE {}

1;

=pod

=encoding UTF-8

=head1 NAME

CodeGen::Cpppp - The C Perl-Powered Pre-Processor

=head1 VERSION

version 0.001_03

=head1 SYNOPSIS

I<Does that mean it's more powerful?  ...Is it more powerful?>

I<Well, it's one layer of abstraction higher, isn't it?  It's not C<m4>.  You see, most
blokes gonna be templating with C<cpp> or C<m4>, you're on C<m4> here all the way up,
all the way up, Where can you go from there? Where?>

I<Nowhere!  Exactly.>

I<What we do is if we need that extra, push, over the cliff, you know what we do?>

I<C<perl>, exactly.>

I<These go to C<perl>.>

B<Input:>

  #! /usr/bin/env cpppp
  ## for (my $bits= 8; $bits <= 16; $bits <<= 1) {
  struct tree_node_$bits {
    uint${bits}_t  left:  ${{$bits-1}},
                   color: 1,
                   right: ${{$bits-1}};
  };
  ## }

B<Output:>

  struct tree_node_8 {
    uint8_t left:  7,
            right: 7,
            color: 1;
  };
  struct tree_node_16 {
    uint16_t left:  15,
             right: 15,
             color: 1;
  };

B<Input:>

  ## my @extra_args;
  extern int fn( char *format, @extra_args );
  ## for ('int a', 'int b') {
  ##   push @extra_args, $_;
  extern int fn_$_( char *format, @extra_args );
  ## }

B<Output:>

  extern int fn( char *format  );
  extern int fn_a( char *format, int a );
  extern int fn_b( char *format, int a, int b );

=head1 DESCRIPTION

B<WARNING: this API is completely and totally unstable>.

This module is a preprocessor for C, or maybe more like a perl template engine
that specializes in generating C code.  Each input file gets translated to Perl
in a way that declares a new OO class, and then you can create instances of that
class with various parameters to generate your C output, or call methods on it
like automatically generating headers or function prototypes.

For the end-user, there is a 'cpppp' command line tool that behaves much like
the 'cpp' tool.

If you have an interest in this, contact me, because I could use help
brainstorming ideas about how to accommodate the most possibilities, here.

B<Possible Future Features:>

=over

=item *

Scan existing headers to discover available macros, structs, and functions on the host.

=item *

Pass a list of headers through the real cpp and analyze the macro output.

=item *

Shell out to a compiler to find 'sizeof' information for structs.

=item *

Directly perform the work of inlining one function into another.

=back

=head1 CONSTRUCTOR

Bare-bones for now, it accepts whatever hash values you hand to it.

=head1 METHODS

=head2 compile_template

  $cpppp->compile_template($input_fh, $filename);
  $cpppp->compile_template(\$scalar_tpl, $filename, $line_offset);

This reads the input file handle (or scalar-ref) and builds a new perl template
class out of it (and dies if there are syntax errors in the template).

Yes, this 'eval's the input, and no, there are not any guards against
malicious templates.  But you run the same risk any time you run someone's
'./configure' script.

=head1 AUTHOR

Michael Conrad <mike@nrdvana.net>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2023 by Michael Conrad.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut

__END__
sub patch_header($self, $fname, $patch_markers=undef) {
   $patch_markers //= "GENERATED ".uc($self->namespace)." HEADERS";
   $self->_patch_file($fname, $patch_markers,
      join '', map { chomp; "$_\n" } $self->public_decl->@*, $self->public_type->@*, $self->public_impl->@*);
}

sub patch_source($self, $fname, $patch_markers=undef) {
   $patch_markers //= "GENERATED ".uc($self->namespace)." IMPLEMENTATION";
   $self->_patch_file($fname, $patch_markers,
      join '', map { chomp; "$_\n" } $self->private_decl->@*, $self->private_type->@*, $self->private_impl->@*);
}

sub patch_xs_boot($self, $fname, $patch_markers=undef) {
   $patch_markers //= "GENERATED ".uc($self->namespace)." XS BOOT";
   $self->_patch_file($fname, $patch_markers,
      join '', map { chomp; "$_\n" } $self->xs_boot->@*);
}

sub _slurp_file($self, $fname) {
   open my $fh, '<', $fname or die "open($fname): $!";
   my $content= do { local $/= undef; <$fh> };
   $fh->close or die "close: $!";
   $content;
}

sub _patch_file($self, $fname, $patch_markers, $new_content) {
   open my $fh, '+<', $fname or die "open($fname): $!";
   my $content= do { local $/= undef; <$fh> };
   $content =~ s{(BEGIN \Q$patch_markers\E[^\n]*\n).*?(^[^\n]+?END \Q$patch_markers\E)}
      {$1$new_content$2}sm
      or croak "Can't find $patch_markers in $fname";
   $fh->seek(0,0) or die "seek: $!";
   $fh->print($content) or die "write: $!";
   $fh->truncate($fh->tell) or die "truncate: $!";
   $fh->close or die "close: $!";
}

1;
