package cc.mallet.pipe;

import cc.mallet.types.Instance;
import cc.mallet.util.Replacement;
import com.meaningcloud.LangRequest;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;

/* loaded from: input_file:cc/mallet/pipe/NGramPreprocessor.class */
public class NGramPreprocessor extends Pipe implements Serializable {
    public ArrayList<ReplacementSet> replacementSets = new ArrayList<>();
    private static final long serialVersionUID = 1;
    private static final int CURRENT_SERIAL_VERSION = 1;

    /* loaded from: input_file:cc/mallet/pipe/NGramPreprocessor$ReplacementSet.class */
    public class ReplacementSet implements Serializable {
        HashMap<String, ArrayList<Replacement>> replacementIndex = new HashMap<>();
        private static final long serialVersionUID = 1;
        private static final int CURRENT_SERIAL_VERSION = 1;

        public ReplacementSet() {
        }

        public void addReplacement(Replacement replacement) {
            String firstToken = replacement.getFirstToken();
            if (!this.replacementIndex.containsKey(firstToken)) {
                this.replacementIndex.put(firstToken, new ArrayList<>());
            }
            this.replacementIndex.get(firstToken).add(replacement);
        }

        public String applyReplacements(String str) {
            String[] split = str.split(LangRequest.DEFAULT_SELECTION);
            StringBuilder sb = new StringBuilder();
            int i = 0;
            while (i < split.length) {
                String str2 = split[i];
                int i2 = i;
                if (this.replacementIndex.containsKey(str2)) {
                    Iterator<Replacement> it2 = this.replacementIndex.get(str2).iterator();
                    while (it2.hasNext()) {
                        i = it2.next().apply(split, i, sb);
                        if (i > i2) {
                            break;
                        }
                    }
                }
                if (i == i2) {
                    sb.append(str2 + LangRequest.DEFAULT_SELECTION);
                    i++;
                }
            }
            return sb.toString();
        }

        private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
            objectOutputStream.writeInt(1);
            objectOutputStream.writeObject(this.replacementIndex);
        }

        private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
            objectInputStream.readInt();
            this.replacementIndex = (HashMap) objectInputStream.readObject();
        }
    }

    public int loadReplacements(String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        ReplacementSet replacementSet = new ReplacementSet();
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                this.replacementSets.add(replacementSet);
                return i;
            }
            replacementSet.addReplacement(new Replacement(readLine));
            i++;
        }
    }

    public int loadDeletions(String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        ReplacementSet replacementSet = new ReplacementSet();
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                this.replacementSets.add(replacementSet);
                return i;
            }
            replacementSet.addReplacement(new Replacement(readLine, ""));
            i++;
        }
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        String replaceAll = ((String) instance.getData()).toLowerCase().replaceAll("&apos;", "'").replaceAll("&quot;", "\"").replaceAll("&gt;", "<").replaceAll("&lt;", ">").replaceAll("[^\\p{L}\\p{N}\\-\\']", LangRequest.DEFAULT_SELECTION).replaceAll("\\s+", LangRequest.DEFAULT_SELECTION);
        Iterator<ReplacementSet> it2 = this.replacementSets.iterator();
        while (it2.hasNext()) {
            replaceAll = it2.next().applyReplacements(replaceAll);
        }
        instance.setData(replaceAll);
        return instance;
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.writeInt(1);
        objectOutputStream.writeObject(this.replacementSets);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.readInt();
        this.replacementSets = (ArrayList) objectInputStream.readObject();
    }
}
