#!/usr/bin/env perl6
#
use v6;

sub MAIN ( Str $fn where $fn.IO ~~ :r = 'name-list.txt',
           Str $fn-stm = 'name-list.stm'
         ) {

  my Str $fn-tm = $fn;
  $fn-tm ~~ s/ '.' \w+ $ /.tm/;

  ( my Array $ordered-name-list, my Array $sex, my Array $real-names,
    my Array $other-names
  ) = get-ordered-values($fn);
  my Str $tme = create-topicmap-entries( $ordered-name-list, $sex,
                                         $real-names, $other-names
                                       );

  my Str $topicmap-text = qq:to/EOTXT/;
  #!/home/test/bin/sxml2xml.pl6
  #
  ---
  #option/xml-prelude/show:        0;
  #option/doctype/show:            0;

  output/filepath:                .;
  output/program/xml:             | xmllint --format - > $fn-tm;
  output/program/cat:             | cat > t.xml;

  ---
  \$topicMap version=2.0
	    xmlns=http://www.topicmaps.org/xtm/
	    xmlns:xsd=http://www.w3.org/2001/XMLSchema-datatypes
	    [

    \$!SxmlCore.comment [
    ==============================================================================
      Start of definitions
    ==============================================================================
    ]

    \$!SxmlCore.comment [ Classes to describe text types ]
    \$topic id=utf8 [ ]

    \$topic id='description' [
      \$instanceOf [ \$topicRef href='#utf8' [ ] ]
      \$name [
        \$type [ \$topicRef href=#description [ ] ]
        \$value [ Text to describe the value or variant of a topic ]
      ]
    ]

    \$!SxmlCore.comment [ Classes to describe human beings ]
    \$topic id=person [
      \$name [
        \$type [ \$topicRef href=#description [ ] ]
        \$value [ Human being with earth origins ]
      ]
    ]

    \$!SxmlCore.comment [ Classes to reference things, URL, IRI, URN ]
    \$topic id=IRI [
      \$name [
        \$type [ \$topicRef href=#description [ ] ]
        \$value [ Pointers to items using relative or absolute reference ]
      ]
    ]

    \$!SxmlCore.comment [ Subclass 'man' of person ]
    \$topic id=man [
      \$instanceOf [ \$topicRef href='#person' [ ] ]

    ]

    \$!SxmlCore.comment [ Subclass 'woman' of person ]
    \$topic id=woman [
      \$instanceOf [ \$topicRef href='#person' [ ] ]

    ]

    \$!SxmlCore.comment [ Classes used as a name type of a persons name topic ]
    \$!SxmlCore.comment [ Desciption of persons full name as subclass of utf8 ]
    \$topic id=full-name [
      \$instanceOf [ \$topicRef href='#utf8' [ ] ]
    ]

    \$!SxmlCore.comment [ Desciption of any persons other names
      as a subclass of full-name. Also used as a name type of a persons
      name topic
    ]
    \$topic id=other-name [
      \$instanceOf [ \$topicRef href='#full-name' [ ] ]
    ]

    \$!SxmlCore.comment [ Classes to describe any image ]
    \$topic id=image [
      \$name [
        \$type [ \$topicRef href='#description' [ ] ]
        \$value [ Picture of any type ]
      ]
    ]

    \$!SxmlCore.comment [ Pictures of a person ]
    \$topic id=person-image [
      \$instanceOf [ \$topicRef href='#image' [ ] ]
    ]


    \$!SxmlCore.comment [ Pictures of a persons face ]
    \$topic id=person-face [
      \$instanceOf [ \$topicRef href='#image' [ ] ]
    ]

  $tme
  ]
  EOTXT

  spurt( $fn-stm, $topicmap-text);
}

#-------------------------------------------------------------------------------
# Get names from the file in $fn and return an ordered list of names
# Program dies if file not found or not readable.
#
sub get-ordered-values ( $fn --> List ) {

  my @entries;
  my @names;
  my @other-names;
  my @real-names;
  my @sex;

  # Gather names info
  #
  if $fn.IO ~~ :r {
    my Str $names-text = slurp($fn);
    for $names-text.split(/\s*\n\s*/) -> $entry {

      # Skip empty lines
      #
      next if $entry ~~ /^\s*$/;

      @entries.push($entry);
    }
  }

  else {
    die "File '$fn' not found";
  }

  # Split up info, first sort all entries based on the lastname.
  #
  my $prev-entry;

  for @entries ==> sort {
    $^a ~~ m/ (<-[,]>*) /;              # Split on field separators
    my $nl-a = $/[0];                   # Take first field == name
    my @nl-a = $nl-a.split(/\s+/);      # Split on spaces
    @nl-a[*-1];                         # Return lastname
  } -> $entry {

say "E: $entry";

    (my $name, my $sex, my $real, my $other, *) = $entry.split(/ \s* ',' \s* /);

    # Make unique entries
    #
    next if ?$prev-entry and $prev-entry eq $entry;
    $prev-entry = $name;
    @names.push($name);
    @sex.push(lc $sex);
    @real-names.push($real); # When first is e.g. actor name or mostly used name
    @other-names.push($other);
  }

  return list( [@names], [@sex], [@real-names], [@other-names]);
}

#-------------------------------------------------------------------------------
#
sub create-topicmap-entries (
  Array $names, Array $sex, Array $real-names, $other-names
  --> Str ) {

  my Str $topicmap-text = qq:to/EOHEADER/;
    \$!SxmlCore.comment [
    ==============================================================================
      Start of names
    ==============================================================================
    ]

  EOHEADER

  # Go through all arrays using length of the names list. Remember created id's
  # to prevent clashes.
  #
  my %ids;
  for 0..($names.elems - 1) -> $index {

    my Str $name = $names[$index];
    my Str $other-name = $other-names[$index];

    # Use real-name so swap with name if defined
    #
    my Str $real-name;
    if ?$real-names[$index] {
      $real-name = $name;
      $name = $real-names[$index];
    }

    # Convert sexcode <m w ?> to <man woman person>
    #
    my Str $person-sex;
    if ?$sex[$index] {
      if $sex[$index] eq 'm' {
        $person-sex = 'man';
      }

      elsif $sex[$index] eq 'w' {
        $person-sex = 'woman';
      }

      else {
        $person-sex = 'person';
      }
    }

    else {
      $person-sex = 'person';
    }

    # Devise the id for the topic entry: 'Firstname something Lastname'
    # will become 'F.S.Lastname'.
    #
    my $id;
    my @id = $name.split(/\s+/);
    for @id[0 .. (@id.elems - 2)] -> $i {
      $i ~~ m/^(.)/;
      $id ~= uc $/[0] ~ '.' if ?$/[0];
    }
    $id ~= @id[*-1];

    # Generate another id if clash
    #
    my $count = 1;
    while %ids{$id}:exists {
      $id ~= '.' ~ $count.fmt('%02d');
      $count++;
    }

    %ids{$id} = 1;

    # Create variant structures from real(swapped with name)
    # and other name
    #
    my Str $variant = '';
    if ?$real-name {
      $variant ~= qq:to/EOVARIANT/;
          \$variant [ 
            \$resourceData datatype=#other-name [ $real-name ]
          ]
      EOVARIANT
    }

    if ?$other-name {
      my @onames = $other-name.split(';');
      for @onames -> $oname {
        $variant ~= qq:to/EOVARIANT/;
            \$variant [ 
              \$resourceData datatype=#other-name [ $oname ]
            ]
        EOVARIANT
      }
    }

    say "Processing: $name, $id";

    $topicmap-text ~= qq:to/EONAME/;
      \$topic id=$id [
        \$instanceOf [ \$topicRef href=#$person-sex [ ] ]
        \$type [ \$topicRef href=#full-name [ ] ]
        \$name [ \$value [ $name ]]
    $variant
      ]

    EONAME
  }

  return $topicmap-text;
}


=finish
#-------------------------------------------------------------------------------
# two parameter sort -> no swartsian transform!! Above sort now with one
# parameter and it sorts twice as fast!
#
sub sort-on-lastname ( Str $name-a, Str $name-b ) {

  $name-a ~~ m/ (<-[,]>*) /;
  my $nl-a = $/[0];

  $name-b ~~ m/ (<-[,]>*) /;
  my $nl-b = $/[0];

  my @nl-a = $nl-a.split(/\s+/);
  my @nl-b = $nl-b.split(/\s+/);

  return @nl-a[*-1] cmp @nl-b[*-1];
}
