package cc.mallet.pipe;

import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import java.io.Serializable;

/* loaded from: input_file:cc/mallet/pipe/FixedVocabTokenizer.class */
public class FixedVocabTokenizer extends Pipe implements Serializable {
    public int minimumLength;
    int[] tokenBuffer;
    int[] characterBuffer;
    static final long serialVersionUID = 1;

    public FixedVocabTokenizer(Alphabet alphabet) {
        super(alphabet, null);
        this.minimumLength = 3;
        this.tokenBuffer = new int[100000];
        this.characterBuffer = new int[1000];
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        Alphabet alphabet = getAlphabet();
        int codePointAt = Character.codePointAt("_", 0);
        if (!(instance.getData() instanceof CharSequence)) {
            throw new IllegalArgumentException("Looking for a CharSequence, found a " + instance.getData().getClass());
        }
        CharSequence charSequence = (CharSequence) instance.getData();
        int i = -1;
        int i2 = 0;
        int codePointCount = Character.codePointCount(charSequence, 0, charSequence.length());
        int i3 = 0;
        while (true) {
            if (i3 >= codePointCount) {
                break;
            }
            if (i2 == this.tokenBuffer.length - 1) {
                System.err.println("Overflowed token buffer");
                break;
            }
            int codePointAt2 = Character.codePointAt(charSequence, i3);
            int type = Character.getType(codePointAt2);
            if (type == 2 || type == 1 || codePointAt2 == codePointAt) {
                i++;
                this.characterBuffer[i] = codePointAt2;
            } else if (type == 20 || type == 9) {
                if (i != -1) {
                    i++;
                    this.characterBuffer[i] = codePointAt2;
                }
            } else if (type == 12 || type == 13 || type == 14 || type == 22 || type == 23 || type == 21 || type == 29 || type == 30 || type == 24) {
                if (i != -1) {
                    String str = new String(this.characterBuffer, 0, i + 1);
                    if (alphabet.contains(str) && i >= this.minimumLength) {
                        this.tokenBuffer[i2] = alphabet.lookupIndex(str);
                        i2++;
                    }
                    i = -1;
                }
            } else if (type == 8 || type == 7 || type == 6 || type == 3 || type == 4 || type == 5) {
                i++;
                this.characterBuffer[i] = codePointAt2;
            }
            i3++;
        }
        if (i != -1) {
            String str2 = new String(this.characterBuffer, 0, i + 1);
            if (alphabet.contains(str2) && i >= this.minimumLength) {
                this.tokenBuffer[i2] = alphabet.lookupIndex(str2);
                i2++;
            }
        }
        int[] iArr = new int[i2];
        System.arraycopy(this.tokenBuffer, 0, iArr, 0, i2);
        instance.setData(new FeatureSequence(alphabet, iArr));
        return instance;
    }
}
