package com.rapidminer.operator.preprocessing.ie.tokenizer.tools;

import cern.colt.matrix.impl.AbstractFormatter;
import de.unidortmund.pg520.fileworker.app.simpleregex.SimpleRegexFinder;
import java.util.ArrayList;

/* loaded from: input_file:com/rapidminer/operator/preprocessing/ie/tokenizer/tools/SentenceSplitter.class */
public class SentenceSplitter {
    private ArrayList<String> contents = new ArrayList<>();
    private DictionaryLookup abbrevations = new DictionaryLookup("../data/dictionaries/abbreviations.txt");
    private DictionaryLookup months = new DictionaryLookup("../data/dictionaries/months.txt");
    private DictionaryLookup datepreq = new DictionaryLookup("../data/dictionaries/dateprequisites.txt");
    private SimpleRegexFinder finder = new SimpleRegexFinder();
    static String suspicious = "[-!.?:)]$";
    static String wrapped_end = "[-]$";
    static String allcaps_wrapped = "[A-Z??????][-]$";
    static String allcaps_begins = "^[A-Z??????][A-Z??????]+";
    static String ends_dot = "[.]$";
    static String begins_small = "^[a-z????????]";
    static String begins_large = "^[A-Z??????]";
    static String innerdots = "[a-z????????A-Z??????0-9_]+[.][a-z????????A-Z??????0-9_,]+";
    static String number = "[0-9]+[.]";
    static String begins_number = "^[0-9]";
    static String sentence_end = "[?!]$";
    static String direct_speech = "[:]$";
    static String begins_brace = "^[(]";
    static String ends_brace = "[)]$";
    public static String single_end_tag = "<end/>";
    public static String end_tag = single_end_tag + System.getProperty("line.separator");

    public String processString(String str) {
        createWordwiseList(str);
        this.contents.trimToSize();
        int size = this.contents.size() - 1;
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(this.contents.get(0) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
        for (int i = 1; i < size; i++) {
            stringBuffer.append(checkEnding(i, size));
        }
        stringBuffer.append(this.contents.get(size) + end_tag);
        this.contents.clear();
        return cleanOutput(stringBuffer).toString();
    }

    private StringBuffer cleanOutput(StringBuffer stringBuffer) {
        String stringBuffer2 = stringBuffer.toString();
        StringBuffer stringBuffer3 = new StringBuffer("");
        for (String str : stringBuffer2.split(System.getProperty("line.separator"))) {
            if (str.trim().length() != 0) {
                if (str.endsWith(single_end_tag)) {
                    str = str.substring(0, str.indexOf(single_end_tag));
                }
                stringBuffer3.append(str.trim() + System.getProperty("line.separator"));
            }
        }
        return stringBuffer3;
    }

    private void createWordwiseList(String str) {
        for (String str2 : str.split(System.getProperty("line.separator"))) {
            String[] split = str2.trim().split("\\s+");
            int length = split.length;
            for (int i = 0; i < length; i++) {
                if (!split[i].equals("") && !split[i].equals(".") && split[i] != null) {
                    this.contents.add(split[i]);
                }
            }
        }
    }

    private String checkEnding(int i, int i2) {
        if (this.finder.findPattern("[??]", this.contents.get(i))) {
        }
        if (!this.finder.findPattern(suspicious, this.contents.get(i))) {
            return this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
        }
        int i3 = i + 1;
        int i4 = i - 1;
        if (this.finder.findPattern(allcaps_wrapped, this.contents.get(i))) {
            return this.finder.findPattern(allcaps_begins, this.contents.get(i3)) ? this.contents.get(i).replaceFirst("-", "") : this.contents.get(i);
        }
        if (this.finder.findPattern(wrapped_end, this.contents.get(i))) {
            return this.contents.get(i);
        }
        if ((!this.finder.findPattern(ends_dot, this.contents.get(i)) || !this.finder.findPattern(begins_small, this.contents.get(i3))) && !this.finder.findPattern(innerdots, this.contents.get(i))) {
            if (this.finder.findPattern(number, this.contents.get(i)) && this.datepreq.hitDict(this.contents.get(i4))) {
                return this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
            }
            if ((!this.finder.findPattern(number, this.contents.get(i)) || !this.months.hitDict(this.contents.get(i3))) && !this.finder.findPattern(begins_small, this.contents.get(i3)) && !this.finder.findPattern(number, this.contents.get(i)) && !this.abbrevations.hitDict(this.contents.get(i))) {
                return this.finder.findPattern(sentence_end, this.contents.get(i)) ? this.contents.get(i) + end_tag : this.finder.findPattern(direct_speech, this.contents.get(i)) ? this.finder.findPattern(begins_number, this.contents.get(i3)) ? this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR : this.contents.get(i) + end_tag : this.finder.findPattern(begins_brace, this.contents.get(i)) ? (this.finder.findPattern(ends_brace, this.contents.get(i)) || this.finder.findPattern(ends_brace, this.contents.get(i3))) ? this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR : end_tag + System.getProperty("line.separator") + this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR : this.finder.findPattern(ends_brace, this.contents.get(i)) ? this.finder.findPattern(begins_brace, this.contents.get(i4)) ? this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR : this.contents.get(i) + end_tag : (this.finder.findPattern(ends_dot, this.contents.get(i)) && this.finder.findPattern(begins_large, this.contents.get(i3))) ? this.contents.get(i) + end_tag : this.contents.get(i) + end_tag;
            }
            return this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
        }
        return this.contents.get(i) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
    }

    public void setSentenceEndTag(String str) {
        end_tag = str;
    }
}
