package org.webdatacommons.webtables.tools.cleaning;

import com.google.common.base.Joiner;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import weka.core.TestInstances;

/* loaded from: input_file:org/webdatacommons/webtables/tools/cleaning/Analysis.class */
public class Analysis {
    private static Joiner joiner = Joiner.on(TestInstances.DEFAULT_SEPARATORS);

    public static List<String> tokenize(Analyzer analyzer, String str) {
        ArrayList arrayList = new ArrayList();
        if (str.length() == 0) {
            return arrayList;
        }
        try {
            TokenStream tokenStream = analyzer.tokenStream((String) null, new StringReader(str));
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                arrayList.add(tokenStream.getAttribute(CharTermAttribute.class).toString());
            }
            tokenStream.end();
            tokenStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return arrayList;
    }

    public static String analyze(Analyzer analyzer, String str) {
        return joiner.join(tokenize(analyzer, str));
    }
}
