#!/usr/local/bin/perl
# file: mkmnemos

while (<>)
{
  if (/^ ([^ ]+) +([^ ]+) +([^(]+)( \(.*)?\n/)
  {
    $mnemo = $1;
    $ucs2 = $2;
    $name = $3;

    $mnemo =~ s/"/\\"/g;
    $name =~ tr/[A-Z] -/[a-z]__/;

    $mnemo = 'n->' if ($ucs2 eq '1e4b');
    $mnemo = 's.-.' if ($ucs2 eq '1e69');

    # characters deleted in unicode 1.1:
    next if ($name =~ /^greek_small_letter_(stigma|digamma|koppa|sampi)$/);

    # not in 1.1_Character_List.txt (what happened to these?):
    next if ($name =~ /^double_low_line$/);
    next if (0x0080 <= hex ($ucs2) && hex ($ucs2) <= 0x009f);
    next if ((hex ($ucs2) & 0xf000) == 0xe000);

    $name =~ s/^degree_centigrade$/degrees_centigrade/;
    $name =~ s/^angstroem_sign$/angstrom_sign/;
    $name =~ s/^latin_small_ligature_ft$/latin_small_ligature_long_s_t/;
    $name =~ s/^(latin_(capital|small)_letter_)ae(.*)$/\1a_e\4/;
    $name =~ s/^(latin_small_letter_)i_dotless$/\1dotless_i/;
    # greek:
    #  RFC 1345 `acute' == UCS2 `tonos'?
    #  RFC 1345 `diaeresis' == UCS2 `dialytika'?
    $name =~ s/^(greek_(capital|small)_letter_.*_with_)acute_and_diaeresis$/\1dialytika_and_tonos/;
    $name =~ s/^acute_accent_and_diaeresis$/greek_spacing_diaeresis_tonos/;
    $name =~ s/^(greek_(capital|small)_letter_.*_)with_acute$/\1tonos/;
    $name =~ s/^(greek_(capital|small)_letter_.*_with_)diaeresis$/\1dialytika/;
    $name =~ s/^greek_iota_below$/greek_spacing_iota_below/;
    $name =~ s/^acute_accent_and_diaeresis$/greek_spacing_diaeresis_tonos/;
    $name =~ s/^superscript_arabic_letter_alef$/arabic_alef_above/;
    $name =~ s/^parenthesized_hangul_ju$/parenthesized_hangul_cieuc_u/;

    # matched via their hex codes:
    $name =~ s/^greek_dasia_and_acute_accent$/greek_small_letter_alpha_with_psili/;
    $name =~ s/^greek_psili_and_acute_accent$/greek_small_letter_alpha_with_dasia/;
    $name =~ s/^greek_diaeresis_and_varia$/greek_small_letter_alpha_with_psili_and_perispomeni/;
    $name =~ s/^greek_diaeresis_and_perispomeni$/greek_small_letter_alpha_with_dasia_and_perispomeni/;
    $name =~ s/^ideographic_ditto_mark$/japanese_industrial_standard_symbol/;

    # characters unified:
    $name =~ s/^greek_non_spacing_dasia_pneumata$/non_spacing_reversed_comma_above/;
    $name =~ s/^greek_non_spacing_psili_pneumata$/non_spacing_comma_above/;

    $name =~ s/^character(_tabulation)$/horizontal\1/;
    $name =~ s/^line(_tabulation)$/vertical\1/;
    $name =~ s/^(data)(link_escape)$/\1_\2/;
    $name =~ s/^(sync)(ronous_idle)$/\1h\2/;
    $name =~ s/^(null|start_of_heading|start_of_text|end_of_text|end_of_transmission|enquiry|acknowledge|bell|backspace|horizontal_tabulation|line_feed|vertical_tabulation|form_feed|carriage_return|shift_out|shift_in|data_link_escape|device_control_one|device_control_two|device_control_three|device_control_four|negative_acknowledge|synchronous_idle|end_of_transmission_block|cancel|end_of_medium|substitute|escape|file_separator|group_separator|record_separator|unit_separator|delete)$/symbol_for_\1/;

    $name .= '_isolated_form' if ($ucs2 eq 'fef5' || $ucs2 eq 'fef7' || $ucs2 eq 'fef9');
    $name .= '_final_form' if ($ucs2 eq 'fef6' || $ucs2 eq 'fef8' || $ucs2 eq 'fefa');

    print "  \"$mnemo\",", ("\t" x ((18-length ($mnemo))/8));
    print "UCS2_$name,", ("\t" x ((65-length ($name))/8));
    print "/* $ucs2 */\n";
  }
}
