package org.apache.tika.parser.feed;

import com.rapidminer.extension.webtableextraction.microdataparser.BaseNodeVisitor;
import com.rometools.rome.feed.synd.SyndContent;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.SyndFeedInput;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/feed/FeedParser.class */
public class FeedParser extends AbstractParser {
    private static final long serialVersionUID = -3785361933034525186L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("rss+xml"), MediaType.application("atom+xml"))));

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        try {
            SyndFeed build = new SyndFeedInput().build(new InputSource((InputStream) new CloseShieldInputStream(inputStream)));
            String stripTags = stripTags(build.getTitleEx());
            String stripTags2 = stripTags(build.getDescriptionEx());
            metadata.set(TikaCoreProperties.TITLE, stripTags);
            metadata.set(TikaCoreProperties.DESCRIPTION, stripTags2);
            XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
            xHTMLContentHandler.startDocument();
            xHTMLContentHandler.element("h1", stripTags);
            xHTMLContentHandler.element("p", stripTags2);
            xHTMLContentHandler.startElement("ul");
            for (SyndEntry syndEntry : build.getEntries()) {
                String link = syndEntry.getLink();
                if (link != null) {
                    xHTMLContentHandler.startElement("li");
                    xHTMLContentHandler.startElement("a", BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_HREF, link);
                    xHTMLContentHandler.characters(stripTags(syndEntry.getTitleEx()));
                    xHTMLContentHandler.endElement("a");
                    SyndContent description = syndEntry.getDescription();
                    if (description != null) {
                        xHTMLContentHandler.newline();
                        xHTMLContentHandler.characters(stripTags(description));
                    }
                    xHTMLContentHandler.endElement("li");
                }
            }
            xHTMLContentHandler.endElement("ul");
            xHTMLContentHandler.endDocument();
        } catch (FeedException e) {
            throw new TikaException("RSS parse error", e);
        }
    }

    private static String stripTags(SyndContent syndContent) {
        String value;
        if (syndContent == null || (value = syndContent.getValue()) == null) {
            return "";
        }
        String[] split = value.split("<[^>]*>");
        StringBuilder sb = new StringBuilder();
        for (String str : split) {
            sb.append(str);
        }
        return sb.toString().trim();
    }
}
