package com.rapidminer.operator.text.io.tokenizer;

import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.text.Document;
import com.rapidminer.operator.text.Token;
import com.rapidminer.operator.text.io.AbstractTokenProcessor;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeInt;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:com/rapidminer/operator/text/io/tokenizer/NGramTokenizerOperator.class */
public class NGramTokenizerOperator extends AbstractTokenProcessor {
    public static final String PARAMETER_LENGTH = "length";
    public static final String PARAMETER_KEEP_TERMS = "keep_terms";

    public NGramTokenizerOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    @Override // com.rapidminer.operator.text.io.AbstractTokenProcessor
    protected Document doWork(Document document) throws UserError {
        int parameterAsInt = getParameterAsInt("length");
        boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_KEEP_TERMS);
        ArrayList arrayList = new ArrayList();
        for (Token token : document.getTokenSequence()) {
            String token2 = token.getToken();
            if (token2.length() > parameterAsInt) {
                for (int i = 0; i < (token2.length() - parameterAsInt) + 1; i++) {
                    arrayList.add(new Token(token2.substring(i, i + parameterAsInt), token));
                }
            }
            if (token2.length() <= parameterAsInt || parameterAsBoolean) {
                arrayList.add(new Token(token2, token));
            }
        }
        document.setTokenSequence(arrayList);
        return document;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeInt("length", "The length n of the n-grams.", 1, Integer.MAX_VALUE, 3, false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_KEEP_TERMS, "Indicates if the original terms (i.e. tokens) should be kept along with the created n-grams.", false, false));
        return parameterTypes;
    }
}
