package edu.stanford.nlp.process;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.stanford.nlp.ling.BasicDocument;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.Word;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/process/StripTagsProcessor.class */
public class StripTagsProcessor extends AbstractListProcessor {
    public static final Set blockTags = new HashSet(Arrays.asList("blockquote", "br", "div", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "li", "ol", "p", "pre", "table", "tr", "ul"));
    private boolean markLineBreaks;

    public StripTagsProcessor() {
        this(false);
    }

    public StripTagsProcessor(boolean z) {
        setMarkLineBreaks(z);
    }

    public boolean getMarkLineBreaks() {
        return this.markLineBreaks;
    }

    public void setMarkLineBreaks(boolean z) {
        this.markLineBreaks = z;
    }

    @Override // edu.stanford.nlp.process.ListProcessor
    public List process(List list) {
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        Iterator it = list.iterator();
        while (it.hasNext()) {
            Word word = (Word) it.next();
            String word2 = word.word();
            if (!word2.startsWith("<") || !word2.endsWith(">")) {
                arrayList.add(word);
                z = false;
            } else if (this.markLineBreaks && !z) {
                int i = 1;
                while (i < word2.length() && !Character.isLetter(word2.charAt(i))) {
                    i++;
                }
                if (i != word2.length()) {
                    int length = word2.length() - 1;
                    while (length > i && !Character.isLetterOrDigit(word2.charAt(length))) {
                        length--;
                    }
                    if (blockTags.contains(word2.substring(i, length + 1).toLowerCase())) {
                        arrayList.add(new Word(AbstractFormatter.DEFAULT_ROW_SEPARATOR));
                        z = true;
                    }
                }
            }
        }
        return arrayList;
    }

    public static void main(String[] strArr) {
        BasicDocument init = new BasicDocument().init("top text <h1>HEADING text</h1> this is <p>new paragraph<br>next line<br/>xhtml break etc.");
        System.out.println("Before:");
        System.out.println(init);
        Document processDocument = new StripTagsProcessor(true).processDocument(init);
        System.out.println("After:");
        System.out.println(processDocument);
        Document processDocument2 = new WordToSentenceProcessor().processDocument(processDocument);
        System.out.println("Sentences:");
        System.out.println(processDocument2);
    }
}
