/*
 * Decompiled with CFR 0.152.
 */
package gov.nih.nlm.nls.lvg.Flows;

import com.ibm.icu.text.Normalizer;
import gov.nih.nlm.nls.lvg.Flows.Transformation;
import gov.nih.nlm.nls.lvg.Lib.Configuration;
import gov.nih.nlm.nls.lvg.Lib.GlobalBehavior;
import gov.nih.nlm.nls.lvg.Lib.LexItem;
import gov.nih.nlm.nls.lvg.Util.UnicodeUtil;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;

public class ToStripDiacritics
extends Transformation
implements Cloneable {
    private static final String INFO = "Strip Diacritics";
    private static final String NO_OPERATION = "NO";
    private static final String MAPPING = "MP";
    private static final String NORM_NFD = "NFD";

    public static Vector<LexItem> Mutate(LexItem in, Hashtable<Character, Character> diacriticMap, boolean detailsFlag, boolean mutateFlag) {
        Vector<LexItem> out = ToStripDiacritics.StripDiacritics(in, diacriticMap, INFO, detailsFlag, mutateFlag);
        return out;
    }

    public static Hashtable<Character, Character> GetDiacriticMapFromFile(Configuration config) {
        String fName = config.GetConfiguration("LVG_DIR") + config.GetConfiguration("LVG_DIACRITICS_FILE");
        String line = null;
        Hashtable<Character, Character> diacriticMap = new Hashtable<Character, Character>();
        try {
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(fName), "UTF-8"));
            while ((line = in.readLine()) != null) {
                if (line.length() <= 0 || line.charAt(0) == '#') continue;
                StringTokenizer buf = new StringTokenizer(line, "|");
                char inChar = (char)UnicodeUtil.UnicodeHexToNum(buf.nextToken());
                char mapChar = buf.nextToken().charAt(0);
                Character diacritic = new Character(inChar);
                Character nDiacritic = new Character(mapChar);
                if (UnicodeUtil.IsAsciiChar(inChar) || !UnicodeUtil.IsAsciiChar(mapChar)) {
                    System.err.println("** Warning: Illegal format in diacritics file: '" + fName + "'.");
                    System.err.println(line);
                    continue;
                }
                diacriticMap.put(diacritic, nDiacritic);
            }
            in.close();
        }
        catch (Exception e) {
            System.err.println("** ERR: problem of opening/reading diacritics file: '" + fName + "'.");
            System.err.println("Exception: " + e.toString());
        }
        return diacriticMap;
    }

    public static char StripDiacritic(char inChar, Hashtable<Character, Character> diacriticMap) {
        Character key = new Character(inChar);
        char outChar = inChar;
        if (diacriticMap.containsKey(key)) {
            outChar = diacriticMap.get(key).charValue();
        } else {
            String normStr = Normalizer.normalize((int)inChar, (Normalizer.Mode)Normalizer.NFD);
            if (normStr.length() > 1 && ToStripDiacritics.ContainDiacritics(normStr)) {
                outChar = normStr.charAt(0);
            }
        }
        return outChar;
    }

    public static String StripDiacritics(String inStr, Hashtable<Character, Character> diacriticMap) {
        StringBuffer buffer = new StringBuffer();
        for (int i = 0; i < inStr.length(); ++i) {
            char curChar = inStr.charAt(i);
            if (UnicodeUtil.IsAsciiChar(curChar)) {
                buffer.append(curChar);
                continue;
            }
            buffer.append(ToStripDiacritics.StripDiacritic(curChar, diacriticMap));
        }
        return buffer.toString();
    }

    public static void main(String[] args) {
        Configuration conf = new Configuration("data.config.lvg", true);
        String testStr = ToStripDiacritics.GetTestStr(args, "resum\u00e9");
        Hashtable<Character, Character> diacriticMap = ToStripDiacritics.GetDiacriticMapFromFile(conf);
        LexItem in = new LexItem(testStr);
        Vector<LexItem> outs = ToStripDiacritics.Mutate(in, diacriticMap, true, true);
        ToStripDiacritics.PrintResults(in, outs);
    }

    private static Vector<LexItem> StripDiacritics(LexItem in, Hashtable<Character, Character> diacriticMap, String infoStr, boolean detailsFlag, boolean mutateFlag) {
        String details = null;
        String mutate = null;
        if (detailsFlag) {
            details = infoStr;
        }
        if (mutateFlag) {
            mutate = new String();
        }
        String inStr = in.GetSourceTerm();
        String fs = GlobalBehavior.GetFieldSeparator();
        StringBuffer buffer = new StringBuffer();
        for (int i = 0; i < inStr.length(); ++i) {
            char curChar = inStr.charAt(i);
            String opStr = NO_OPERATION + fs;
            Character key = new Character(curChar);
            char outChar = curChar;
            if (diacriticMap.containsKey(key)) {
                outChar = diacriticMap.get(key).charValue();
                opStr = MAPPING + fs;
            } else {
                String normStr = Normalizer.normalize((int)curChar, (Normalizer.Mode)Normalizer.NFD);
                if (normStr.length() > 1 && ToStripDiacritics.ContainDiacritics(normStr)) {
                    outChar = normStr.charAt(0);
                    opStr = NORM_NFD + fs;
                }
            }
            buffer.append(outChar);
            if (!mutateFlag) continue;
            mutate = mutate + opStr;
        }
        String term = buffer.toString();
        Vector<LexItem> out = new Vector<LexItem>();
        LexItem temp = ToStripDiacritics.UpdateLexItem(in, term, 40, -1L, -1L, details, mutate);
        out.addElement(temp);
        return out;
    }

    private static boolean ContainDiacritics(String inStr) {
        boolean flag = false;
        for (int i = 0; i < inStr.length(); ++i) {
            char curChar = inStr.charAt(i);
            int curInt = UnicodeUtil.CharToNum(curChar);
            if (curInt <= 767 || curInt >= 880) continue;
            flag = true;
        }
        return flag;
    }
}

