package edu.stanford.nlp.trees.international.arabic;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.process.Function;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;

/* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/Buckwalter.class */
public class Buckwalter implements Function<String, String> {
    private char[] arabicChars;
    private char[] buckChars;
    private boolean u2b;
    private HashMap<Character, Character> a2b;
    private HashMap<Character, Character> b2a;
    private Counter<String> unmappable;
    private static final boolean DEBUG = true;
    private static final boolean PASS_ASCII_IN_UNICODE = true;

    @Override // edu.stanford.nlp.process.Function
    public String apply(String str) {
        return convert(str, this.u2b);
    }

    private String convert(String str, boolean z) {
        StringBuilder sb = new StringBuilder(str.length());
        int length = str.length();
        for (int i = 0; i < length; i++) {
            Character ch = new Character(str.charAt(i));
            Character ch2 = z ? ch.charValue() < 127 ? ch : this.a2b.get(ch) : this.b2a.get(ch);
            if (ch2 == null) {
                this.unmappable.incrementCount(ch + "[U+" + StringUtils.padLeft(Integer.toString(ch.charValue(), 16).toUpperCase(), 4, '0') + "]");
                sb.append(ch);
            } else {
                sb.append(ch2);
            }
        }
        return sb.toString();
    }

    public String buckwalterToUnicode(String str) {
        return convert(str, false);
    }

    public String unicodeToBuckwalter(String str) {
        return convert(str, true);
    }

    public Buckwalter() {
        this(false);
    }

    public Buckwalter(boolean z) {
        this.arabicChars = new char[]{1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1648, 1649, 1662, 1670, 1688, 1700, 1711, 1573, 1571, 1572, 1548, 1563, 1567, 1642, 1643, 1644, 1645, 1748, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 171, 187};
        this.buckChars = new char[]{'\'', '|', '>', '&', '<', '}', 'A', 'b', 'p', 't', 'v', 'j', 'H', 'x', 'd', '*', 'r', 'z', 's', '$', 'S', 'D', 'T', 'Z', 'E', 'g', '_', 'f', 'q', 'k', 'l', 'm', 'n', 'h', 'w', 'Y', 'y', 'F', 'N', 'K', 'a', 'u', 'i', '~', 'o', '`', '{', 'P', 'J', 'R', 'V', 'G', 'I', 'O', 'W', ',', ';', '?', '%', '.', ',', '*', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '\"', '\"'};
        if (this.arabicChars.length != this.buckChars.length) {
            throw new RuntimeException("Buckwalter: Bad char arrays");
        }
        this.a2b = new HashMap<>(this.arabicChars.length);
        this.b2a = new HashMap<>(this.buckChars.length);
        for (int i = 0; i < this.arabicChars.length; i++) {
            Character ch = new Character(this.arabicChars[i]);
            Character ch2 = new Character(this.buckChars[i]);
            this.a2b.put(ch, ch2);
            this.b2a.put(ch2, ch);
        }
        this.unmappable = new Counter<>();
        this.u2b = z;
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 1 || !(strArr[0].equals("-a2b") || strArr[0].equals("-b2a"))) {
            System.err.println("usage: java Buckwalter [-a2b|-b2a] words+ OR, as a filter, just [-a2b|-b2a]");
            return;
        }
        Buckwalter buckwalter = new Buckwalter(strArr[0].equals("-a2b"));
        if (strArr.length <= 1) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                } else {
                    EncodingPrintWriter.out.println(buckwalter.apply(readLine), "utf-8");
                }
            }
        } else {
            for (int i = 1; i < strArr.length; i++) {
                EncodingPrintWriter.out.println(strArr[i] + " -> " + buckwalter.apply(strArr[i]), "utf-8");
            }
        }
        if (buckwalter.unmappable.keySet().size() <= 0) {
            EncodingPrintWriter.err.println("All characters successfully converted!", "utf-8");
        } else {
            EncodingPrintWriter.err.println("Characters that could not be converted [passed through!]:", "utf-8");
            EncodingPrintWriter.err.println(buckwalter.unmappable.toString(), "utf-8");
        }
    }
}
