package com.rapidminer.operator.text.io.filereader;

import com.rapidminer.operator.text.io.filereader.reader.TagIgnoringReader;
import com.rapidminer.tools.LogService;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.Charset;
import org.ccil.cowan.tagsoup.Parser;
import org.jdom.CDATA;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Text;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.XMLReader;

/* loaded from: input_file:com/rapidminer/operator/text/io/filereader/HTMLFileReader.class */
public class HTMLFileReader extends AbstractFileReader {
    @Override // com.rapidminer.operator.text.io.filereader.FileReader
    public String readStream(InputStream inputStream, boolean z, Charset charset) throws IOException {
        SAXBuilder sAXBuilder = getSAXBuilder();
        sAXBuilder.setValidation(false);
        String readStream = new TextFileReader().readStream(inputStream, z, charset);
        try {
            Document build = sAXBuilder.build(new StringReader(readStream));
            StringWriter stringWriter = new StringWriter();
            if (z) {
                new XMLOutputter(Format.getPrettyFormat()).output(build.getRootElement(), stringWriter);
                return stringWriter.getBuffer().toString();
            }
            elementToString(stringWriter, build.getRootElement());
            return stringWriter.getBuffer().toString();
        } catch (JDOMException e) {
            if (z) {
                LogService.getRoot().warning("Could build DOM from XML! Just using plain text. Reason: " + e.getMessage());
                return readStream;
            }
            LogService.getRoot().warning("Could build DOM from XML! Just removing tags. Reason: " + e.getMessage());
            TagIgnoringReader tagIgnoringReader = new TagIgnoringReader(new StringReader(readStream));
            Throwable th = null;
            try {
                try {
                    String readText = tagIgnoringReader.readText();
                    if (tagIgnoringReader != null) {
                        if (0 != 0) {
                            try {
                                tagIgnoringReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            tagIgnoringReader.close();
                        }
                    }
                    return readText;
                } finally {
                }
            } catch (Throwable th3) {
                if (tagIgnoringReader != null) {
                    if (th != null) {
                        try {
                            tagIgnoringReader.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        tagIgnoringReader.close();
                    }
                }
                throw th3;
            }
        }
    }

    protected SAXBuilder getSAXBuilder() {
        SAXBuilder sAXBuilder = new SAXBuilder(Parser.class.getName()) { // from class: com.rapidminer.operator.text.io.filereader.HTMLFileReader.1
            @Override // org.jdom.input.SAXBuilder
            protected XMLReader createParser() throws JDOMException {
                return new Parser();
            }
        };
        sAXBuilder.setFeature(Parser.CDATAElementsFeature, false);
        return sAXBuilder;
    }

    private void elementToString(StringWriter stringWriter, Element element) {
        for (Object obj : element.getContent()) {
            if (obj instanceof Element) {
                elementToString(stringWriter, (Element) obj);
            } else if (!(obj instanceof CDATA) && (obj instanceof Text)) {
                stringWriter.write(((Text) obj).getText());
            }
        }
    }

    @Override // com.rapidminer.operator.text.io.filereader.FileReader
    public String getExtension(boolean z) {
        return z ? "html" : "txt";
    }
}
