package com.quantx1.operator.sentiment;

import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.text.Document;
import com.rapidminer.operator.text.Token;
import com.rapidminer.operator.text.io.stemmer.PorterStemming;
import com.rapidminer.operator.text.io.tokenizer.StringTokenizerOperator;
import com.rapidminer.operator.text.io.wordfilter.stopwordlists.StopWordListEnglish;
import com.rapidminer.parameter.ParameterType;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/* loaded from: input_file:com/quantx1/operator/sentiment/TokenizerOperator.class */
public class TokenizerOperator extends StringTokenizerOperator {
    private final int minNumberOfChars = 4;
    private final int maxNumberOfChars = 25;

    public TokenizerOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.minNumberOfChars = 4;
        this.maxNumberOfChars = 25;
    }

    public List<ParameterType> getParameterTypes() {
        return Collections.emptyList();
    }

    protected Document doWork(Document document) throws UserError {
        return stem(filterLength(filterStopWords(tokenize(document))));
    }

    protected Document tokenize(Document document) {
        ArrayList arrayList = new ArrayList();
        for (Token token : document.getTokenSequence()) {
            char[] charArray = token.getToken().toCharArray();
            int i = 0;
            for (int i2 = 0; i2 < charArray.length; i2++) {
                if (!Character.isLetter(charArray[i2])) {
                    if (i2 - i > 0) {
                        arrayList.add(new Token(new String(charArray, i, i2 - i), token));
                    }
                    i = i2 + 1;
                }
            }
            if (charArray.length - i > 0) {
                arrayList.add(new Token(new String(charArray, i, charArray.length - i), token));
            }
        }
        document.setTokenSequence(arrayList);
        return document;
    }

    protected Document stem(Document document) throws UserError {
        PorterStemming porterStemming = new PorterStemming();
        ArrayList arrayList = new ArrayList(document.getTokenSequence().size());
        for (Token token : document.getTokenSequence()) {
            char[] charArray = token.getToken().toLowerCase().toCharArray();
            porterStemming.add(charArray, charArray.length);
            porterStemming.stem();
            arrayList.add(new Token(new String(porterStemming.getResultBuffer(), 0, porterStemming.getResultLength()), token));
        }
        document.setTokenSequence(arrayList);
        return document;
    }

    protected Document filterStopWords(Document document) throws UserError {
        StopWordListEnglish stopWordListEnglish = new StopWordListEnglish();
        ArrayList arrayList = new ArrayList(document.getTokenSequence().size());
        for (Token token : document.getTokenSequence()) {
            if (!stopWordListEnglish.isStopword(token.getToken())) {
                arrayList.add(token);
            }
        }
        document.setTokenSequence(arrayList);
        return document;
    }

    protected Document filterLength(Document document) throws UserError {
        ArrayList arrayList = new ArrayList(document.getTokenSequence().size());
        for (Token token : document.getTokenSequence()) {
            if (token.getToken().length() >= 4 && token.getToken().length() <= 25) {
                arrayList.add(token);
            }
        }
        document.setTokenSequence(arrayList);
        return document;
    }
}
