package com.rapidminer.operator.text.io.transformer;

import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.text.Document;
import com.rapidminer.operator.text.Token;
import com.rapidminer.operator.text.io.AbstractTokenProcessor;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeString;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:com/rapidminer/operator/text/io/transformer/DocumentCleaningOperator.class */
public class DocumentCleaningOperator extends AbstractTokenProcessor {
    private static final String PARAMETER_DELETION_REGEX = "deletion_regex";

    public DocumentCleaningOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    @Override // com.rapidminer.operator.text.io.AbstractTokenProcessor
    protected Document doWork(Document document) throws OperatorException {
        ArrayList arrayList = new ArrayList();
        Pattern compile = Pattern.compile(getParameterAsString(PARAMETER_DELETION_REGEX), 32);
        for (Token token : document.getTokenSequence()) {
            arrayList.add(new Token(compile.matcher(token.getToken()).replaceAll(""), token));
        }
        document.setTokenSequence(arrayList);
        return document;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeString(PARAMETER_DELETION_REGEX, "This regular expression specifies the parts of the string, which are deleted.", false));
        return parameterTypes;
    }
}
