package com.rapidminer.operator.text.io.transformer;

import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.text.Document;
import com.rapidminer.operator.text.io.AbstractTokenProcessor;
import com.rapidminer.tools.xml.text.XHTMLEntityResolver;
import java.io.IOException;
import java.io.StringReader;
import org.ccil.cowan.tagsoup.Parser;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;
import org.xml.sax.XMLReader;

/* loaded from: input_file:com/rapidminer/operator/text/io/transformer/HtmlToXml.class */
public class HtmlToXml extends AbstractTokenProcessor {
    public HtmlToXml(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    @Override // com.rapidminer.operator.text.io.AbstractTokenProcessor
    protected Document doWork(Document document) throws OperatorException {
        SAXBuilder sAXBuilder = new SAXBuilder(Parser.class.getName()) { // from class: com.rapidminer.operator.text.io.transformer.HtmlToXml.1
            @Override // org.jdom.input.SAXBuilder
            protected XMLReader createParser() throws JDOMException {
                return new Parser();
            }
        };
        sAXBuilder.setEntityResolver(new XHTMLEntityResolver(sAXBuilder.getEntityResolver()));
        try {
            return new Document(new XMLOutputter().outputString(sAXBuilder.build(new StringReader(document.getTokenText()))));
        } catch (IOException e) {
            throw new UserError(this, e, 301, new Object[]{"document", e.getLocalizedMessage()});
        } catch (JDOMException e2) {
            throw new UserError(this, e2, "malformed_html", new Object[]{e2.getLocalizedMessage()});
        }
    }
}
