package cc.mallet.share.mccallum.ner;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import com.meaningcloud.LangRequest;
import java.util.regex.Pattern;

/* loaded from: input_file:cc/mallet/share/mccallum/ner/ConllNer2003Sentence2TokenSequence.class */
public class ConllNer2003Sentence2TokenSequence extends Pipe {
    static final String[] endings = {"ing", "ed", "ogy", "s", "ly", "ion", "tion", "ity", "ies"};
    static Pattern[] endingPatterns = new Pattern[endings.length];
    static final String[][][] endingNames = new String[2][3][endings.length];
    boolean saveSource;
    boolean doConjunctions;
    boolean doTags;
    boolean doPhrases;
    boolean doSpelling;
    boolean doDigitCollapses;
    boolean doDowncasing;

    public ConllNer2003Sentence2TokenSequence() {
        super(null, new LabelAlphabet());
        for (int i = 0; i < endings.length; i++) {
            endingPatterns[i] = Pattern.compile(".*" + endings[i] + "$");
            for (int i2 = 0; i2 < 3; i2++) {
                int i3 = 0;
                while (i3 < 2) {
                    endingNames[i3][i2][i] = "W" + (i3 == 1 ? "-" : "") + i2 + "=<END" + endings[i] + ">";
                    i3++;
                }
            }
        }
        this.saveSource = false;
        this.doConjunctions = false;
        this.doTags = true;
        this.doPhrases = true;
        this.doSpelling = false;
        this.doDigitCollapses = true;
        this.doDowncasing = false;
    }

    public ConllNer2003Sentence2TokenSequence(boolean z) {
        super(null, new LabelAlphabet());
        for (int i = 0; i < endings.length; i++) {
            endingPatterns[i] = Pattern.compile(".*" + endings[i] + "$");
            for (int i2 = 0; i2 < 3; i2++) {
                int i3 = 0;
                while (i3 < 2) {
                    endingNames[i3][i2][i] = "W" + (i3 == 1 ? "-" : "") + i2 + "=<END" + endings[i] + ">";
                    i3++;
                }
            }
        }
        this.saveSource = false;
        this.doConjunctions = false;
        this.doTags = true;
        this.doPhrases = true;
        this.doSpelling = false;
        this.doDigitCollapses = true;
        this.doDowncasing = false;
        if (z) {
            return;
        }
        this.doTags = false;
        this.doPhrases = false;
        this.doSpelling = false;
        this.doConjunctions = false;
        this.doDigitCollapses = false;
        this.doDowncasing = true;
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        String str;
        String str2;
        String str3;
        String str4;
        String[] split = ((String) instance.getData()).split("\n");
        TokenSequence tokenSequence = new TokenSequence(split.length);
        LabelSequence labelSequence = new LabelSequence((LabelAlphabet) getTargetAlphabet(), split.length);
        boolean[][] zArr = new boolean[3][endings.length];
        boolean[][] zArr2 = new boolean[3][endings.length];
        boolean[][] zArr3 = new boolean[3][endings.length];
        StringBuffer stringBuffer = this.saveSource ? new StringBuffer() : null;
        String str5 = "NOLABEL";
        Pattern compile = Pattern.compile("I-.*");
        for (int i = 0; i < split.length; i++) {
            if (split[i].length() != 0) {
                String[] split2 = split[i].split(LangRequest.DEFAULT_SELECTION);
                if (split2.length != 4) {
                    throw new IllegalStateException("Line \"" + split[i] + "\" doesn't have four elements");
                }
                str = split2[0];
                str2 = split2[1];
                str3 = split2[2];
                str4 = split2[3];
            } else {
                str = "-<S>-";
                str2 = "-<S>-";
                str3 = "-<S>-";
                str4 = "O";
            }
            if (this.doDigitCollapses) {
                if (str.matches("19\\d\\d")) {
                    str = "<YEAR>";
                } else if (str.matches("19\\d\\ds")) {
                    str = "<YEARDECADE>";
                } else if (str.matches("19\\d\\d-\\d+")) {
                    str = "<YEARSPAN>";
                } else if (str.matches("\\d+\\\\/\\d")) {
                    str = "<FRACTION>";
                } else if (str.matches("\\d[\\d,\\.]*")) {
                    str = "<DIGITS>";
                } else if (str.matches("19\\d\\d-\\d\\d-\\d--d")) {
                    str = "<DATELINEDATE>";
                } else if (str.matches("19\\d\\d-\\d\\d-\\d\\d")) {
                    str = "<DATELINEDATE>";
                } else if (str.matches(".*-led")) {
                    str = "<LED>";
                } else if (str.matches(".*-sponsored")) {
                    str = "<LED>";
                }
            }
            if (this.doDowncasing) {
                str = str.toLowerCase();
            }
            Token token = new Token(str);
            if (this.doSpelling) {
                for (int i2 = 0; i2 < endings.length; i2++) {
                    zArr[2][i2] = zArr[1][i2];
                    zArr[1][i2] = zArr[0][i2];
                    zArr[0][i2] = endingPatterns[i2].matcher(str).matches();
                    if (zArr[0][i2]) {
                        token.setFeatureValue(endingNames[0][0][i2], 1.0d);
                    }
                }
            }
            if (this.doTags) {
                token.setFeatureValue("T=" + str2, 1.0d);
            }
            if (this.doPhrases) {
                token.setFeatureValue("P=" + str3, 1.0d);
            }
            String str6 = str4;
            if (compile.matcher(str4).matches() && (str5.length() < 3 || !str5.substring(2).equals(str4.substring(2)))) {
                str4 = "B" + str6.substring(1);
            }
            str5 = str6;
            tokenSequence.add((TokenSequence) token);
            labelSequence.add(str4);
            if (this.saveSource) {
                stringBuffer.append(str);
                stringBuffer.append(LangRequest.DEFAULT_SELECTION);
                stringBuffer.append(str4);
                stringBuffer.append("\n");
            }
        }
        instance.setData(tokenSequence);
        instance.setTarget(labelSequence);
        if (this.saveSource) {
            instance.setSource(stringBuffer);
        }
        return instance;
    }
}
