package com.rapidminer.operator.web.io.loganalysis;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.io.AbstractExampleSource;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeRegexp;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.LogService;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import org.apache.http.HttpStatus;
import org.jdom.JDOMException;
import org.polliwog.WeblogException;
import org.polliwog.data.Hit;
import org.polliwog.data.LogEntryFormat;
import org.polliwog.fields.RequestHeaderField;

/* loaded from: input_file:com/rapidminer/operator/web/io/loganalysis/LogFileSourceOperator.class */
public class LogFileSourceOperator extends AbstractExampleSource {
    public static final String PARAMETER_LANGUAGE_EXPRESSION = "language_expression";
    public static final String PARAMETER_OS_EXPRESSION = "os_expression";
    public static final String PARAMETER_BROWSER_EXPRESSION = "browser_expression";
    public static final String PARAMETER_SESSION_TIMEOUT = "session_timeout";
    public static final String PARAMETER_LANGUAGE_MATCHER = "language_matcher";
    public static final String PARAMETER_OS_MATCHER = "os_matcher";
    public static final String PARAMETER_BROWSER_MATCHER = "browser_matcher";
    public static final String PARAMETER_ONLY_HTTP_200 = "only_HTTP_200";
    public static final String PARAMETER_FILETYPE_FILTER = "filetype_filter";
    public static final String PARAMETER_ROBOT_FILTER = "robot_filter";
    public static final String PARAMETER_DNS_LOOKUP = "dns_lookup";
    public static final String PARAMETER_LOG_DIR = "log_dir";
    public static final String PARAMETER_CONFIG_FILE = "config_file";
    public static final String PARAMETER_BROWSER = "browser";
    public static final String PARAMETER_LANGUAGE = "language";
    public static final String PARAMETER_OS = "operating_system";

    public LogFileSourceOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    public MetaData getGeneratedMetaData() throws OperatorException {
        ExampleSetMetaData exampleSetMetaData = new ExampleSetMetaData();
        exampleSetMetaData.addAttribute(new AttributeMetaData("session", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("ip", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("agent", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("uri", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData(RequestHeaderField.REFERER, 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("os_name", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("browser", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("language", 1));
        exampleSetMetaData.addAttribute(new AttributeMetaData("time", 2));
        return exampleSetMetaData;
    }

    public ExampleSet createExampleSet() throws OperatorException {
        int intValue;
        HashMap hashMap = new HashMap();
        RegularExpressionMatcher regularExpressionMatcher = isParameterSet(PARAMETER_BROWSER_MATCHER) ? new RegularExpressionMatcher(getParameterList(PARAMETER_BROWSER_MATCHER)) : null;
        RegularExpressionMatcher regularExpressionMatcher2 = isParameterSet(PARAMETER_OS_MATCHER) ? new RegularExpressionMatcher(getParameterList(PARAMETER_OS_MATCHER)) : null;
        RegularExpressionMatcher regularExpressionMatcher3 = isParameterSet(PARAMETER_LANGUAGE_MATCHER) ? new RegularExpressionMatcher(getParameterList(PARAMETER_LANGUAGE_MATCHER)) : null;
        LinkedList linkedList = new LinkedList();
        if (isParameterSet(PARAMETER_ROBOT_FILTER)) {
            final RegularExpressionMatcher regularExpressionMatcher4 = new RegularExpressionMatcher(getParameterList(PARAMETER_ROBOT_FILTER));
            linkedList.add(new LogEntryFilter() { // from class: com.rapidminer.operator.web.io.loganalysis.LogFileSourceOperator.1
                @Override // com.rapidminer.operator.web.io.loganalysis.LogEntryFilter
                public boolean accept(Hit hit) {
                    return !regularExpressionMatcher4.isSubstringMatch(hit.getUserAgent());
                }
            });
        }
        if (isParameterSet(PARAMETER_FILETYPE_FILTER)) {
            try {
                final RegularExpressionMatcher regularExpressionMatcher5 = new RegularExpressionMatcher(new StringReader(getParameterAsString(PARAMETER_FILETYPE_FILTER)), false);
                linkedList.add(new LogEntryFilter() { // from class: com.rapidminer.operator.web.io.loganalysis.LogFileSourceOperator.2
                    @Override // com.rapidminer.operator.web.io.loganalysis.LogEntryFilter
                    public boolean accept(Hit hit) {
                        return !regularExpressionMatcher5.isSubstringMatch(hit.getRequestURI().getPath());
                    }
                });
            } catch (IOException e) {
            }
        }
        if (getParameterAsBoolean(PARAMETER_ONLY_HTTP_200)) {
            linkedList.add(new LogEntryFilter() { // from class: com.rapidminer.operator.web.io.loganalysis.LogFileSourceOperator.3
                @Override // com.rapidminer.operator.web.io.loganalysis.LogEntryFilter
                public boolean accept(Hit hit) {
                    return hit.getStatus() == 200;
                }
            });
        }
        boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_DNS_LOOKUP);
        File parameterAsFile = getParameterAsFile(PARAMETER_CONFIG_FILE);
        try {
            LogEntryFormat logEntryFormat = new LogEntryFormat(parameterAsFile, ".gz");
            Attribute createAttribute = AttributeFactory.createAttribute("session", 1);
            Attribute createAttribute2 = AttributeFactory.createAttribute("ip", 1);
            Attribute createAttribute3 = AttributeFactory.createAttribute("agent", 1);
            Attribute createAttribute4 = AttributeFactory.createAttribute("uri", 1);
            Attribute createAttribute5 = AttributeFactory.createAttribute(RequestHeaderField.REFERER, 1);
            Attribute createAttribute6 = AttributeFactory.createAttribute("os_name", 1);
            Attribute createAttribute7 = AttributeFactory.createAttribute("browser", 1);
            Attribute createAttribute8 = AttributeFactory.createAttribute("language", 1);
            Attribute createAttribute9 = AttributeFactory.createAttribute("time", 2);
            LinkedList linkedList2 = new LinkedList();
            linkedList2.add(createAttribute);
            linkedList2.add(createAttribute2);
            linkedList2.add(createAttribute3);
            linkedList2.add(createAttribute4);
            linkedList2.add(createAttribute5);
            linkedList2.add(createAttribute6);
            linkedList2.add(createAttribute7);
            linkedList2.add(createAttribute8);
            linkedList2.add(createAttribute9);
            ExampleSetBuilder from = ExampleSets.from(linkedList2);
            int i = 0;
            HashMap hashMap2 = new HashMap();
            HashMap hashMap3 = new HashMap();
            int parameterAsInt = getParameterAsInt(PARAMETER_SESSION_TIMEOUT);
            for (File file : getParameterAsFile(PARAMETER_LOG_DIR).listFiles(new FileFilter() { // from class: com.rapidminer.operator.web.io.loganalysis.LogFileSourceOperator.4
                @Override // java.io.FileFilter
                public boolean accept(File file2) {
                    return file2.isFile();
                }
            })) {
                try {
                    BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
                    Throwable th = null;
                    try {
                        try {
                            int i2 = 0;
                            int i3 = 0;
                            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                                String trim = readLine.trim();
                                if (!trim.startsWith("#") && trim.length() > 0) {
                                    try {
                                        Hit hit = (Hit) logEntryFormat.createEntry(trim, Hit.class);
                                        boolean z = false;
                                        Iterator it = linkedList.iterator();
                                        while (true) {
                                            if (!it.hasNext()) {
                                                break;
                                            }
                                            if (!((LogEntryFilter) it.next()).accept(hit)) {
                                                z = true;
                                                break;
                                            }
                                        }
                                        if (!z) {
                                            DoubleArrayDataRow doubleArrayDataRow = new DoubleArrayDataRow(new double[linkedList2.size()]);
                                            String str = hit.getHostname() + ":" + hit.getUserAgent();
                                            Hit hit2 = (Hit) hashMap3.get(str);
                                            hashMap3.put(str, hit);
                                            boolean z2 = false;
                                            if (hit2 == null) {
                                                z2 = true;
                                            } else if (hit.getDate().getTime() - hit2.getDate().getTime() > parameterAsInt) {
                                                z2 = true;
                                            }
                                            if (z2) {
                                                i++;
                                                intValue = i;
                                                hashMap2.put(str, Integer.valueOf(i));
                                            } else {
                                                intValue = ((Integer) hashMap2.get(str)).intValue();
                                            }
                                            String hostname = hit.getHostname();
                                            if (parameterAsBoolean) {
                                                hostname = reverseDNSLookUp(hostname, hashMap);
                                            }
                                            doubleArrayDataRow.set(createAttribute2, createAttribute2.getMapping().mapString(hostname));
                                            doubleArrayDataRow.set(createAttribute3, createAttribute3.getMapping().mapString(hit.getUserAgent()));
                                            doubleArrayDataRow.set(createAttribute4, createAttribute4.getMapping().mapString(hit.getRequestURI().toString()));
                                            if (hit.getRefererURI() != null) {
                                                doubleArrayDataRow.set(createAttribute5, createAttribute5.getMapping().mapString(hit.getRefererURI().toString()));
                                            } else {
                                                doubleArrayDataRow.set(createAttribute5, Double.NaN);
                                            }
                                            doubleArrayDataRow.set(createAttribute9, (int) (hit.getDate().getTime() / 60000));
                                            if (regularExpressionMatcher != null) {
                                                String match = regularExpressionMatcher.getMatch(hit.getUserAgent());
                                                if (match == null) {
                                                    match = "other";
                                                }
                                                doubleArrayDataRow.set(createAttribute7, createAttribute7.getMapping().mapString(match));
                                            } else {
                                                doubleArrayDataRow.set(createAttribute7, Double.NaN);
                                            }
                                            if (regularExpressionMatcher2 != null) {
                                                String match2 = regularExpressionMatcher2.getMatch(hit.getUserAgent());
                                                if (match2 == null) {
                                                    match2 = "other";
                                                }
                                                doubleArrayDataRow.set(createAttribute6, createAttribute6.getMapping().mapString(match2));
                                            } else {
                                                doubleArrayDataRow.set(createAttribute6, Double.NaN);
                                            }
                                            if (regularExpressionMatcher3 != null) {
                                                String match3 = regularExpressionMatcher3.getMatch(hit.getUserAgent());
                                                if (match3 == null) {
                                                    match3 = "other";
                                                }
                                                doubleArrayDataRow.set(createAttribute8, createAttribute8.getMapping().mapString(match3));
                                            } else {
                                                doubleArrayDataRow.set(createAttribute8, Double.NaN);
                                            }
                                            doubleArrayDataRow.set(createAttribute, createAttribute.getMapping().mapString("s" + intValue));
                                            from.addDataRow(doubleArrayDataRow);
                                            i2++;
                                        }
                                    } catch (WeblogException e2) {
                                        i3++;
                                    }
                                }
                                if (i3 > 0) {
                                    LogService.getRoot().log(Level.INFO, file.getAbsolutePath() + ": Could not read " + i3 + "lines out of " + i2);
                                }
                            }
                            if (bufferedReader != null) {
                                if (0 != 0) {
                                    try {
                                        bufferedReader.close();
                                    } catch (Throwable th2) {
                                        th.addSuppressed(th2);
                                    }
                                } else {
                                    bufferedReader.close();
                                }
                            }
                        } catch (Throwable th3) {
                            th = th3;
                            throw th3;
                            break;
                        }
                    } catch (Throwable th4) {
                        if (bufferedReader != null) {
                            if (th != null) {
                                try {
                                    bufferedReader.close();
                                } catch (Throwable th5) {
                                    th.addSuppressed(th5);
                                }
                            } else {
                                bufferedReader.close();
                            }
                        }
                        throw th4;
                        break;
                    }
                } catch (FileNotFoundException e3) {
                    getProcess().getLog().logWarning(file.getAbsolutePath() + ": Could not read this file. Ignoring it");
                } catch (IOException e4) {
                    getProcess().getLog().logWarning(file.getAbsolutePath() + ": Could not read this file. Ignoring it");
                } catch (RuntimeException e5) {
                    e5.printStackTrace();
                }
            }
            return from.build();
        } catch (IOException e6) {
            throw new UserError(this, HttpStatus.SC_MOVED_TEMPORARILY, new Object[]{parameterAsFile.getAbsolutePath(), e6});
        } catch (JDOMException e7) {
            throw new UserError(this, HttpStatus.SC_MOVED_TEMPORARILY, new Object[]{parameterAsFile.getAbsolutePath(), e7});
        } catch (WeblogException e8) {
            throw new UserError(this, HttpStatus.SC_MOVED_TEMPORARILY, new Object[]{parameterAsFile.getAbsolutePath(), e8});
        }
    }

    private String reverseDNSLookUp(String str, Map<String, String> map) {
        String str2;
        String str3 = map.get(str);
        if (str3 != null) {
            return str3;
        }
        try {
            str2 = InetAddress.getByName(str).getHostName();
        } catch (UnknownHostException e) {
            str2 = null;
        }
        map.put(str, "" + str2);
        return str2 != null ? str2 : str;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile(PARAMETER_CONFIG_FILE, "The configuration file for the logfile's format.", "xml", false);
        parameterTypeFile.setExpert(false);
        parameterTypes.add(parameterTypeFile);
        ParameterTypeDirectory parameterTypeDirectory = new ParameterTypeDirectory(PARAMETER_LOG_DIR, "The directory containing the log files.", false);
        parameterTypeDirectory.setExpert(false);
        parameterTypes.add(parameterTypeDirectory);
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_DNS_LOOKUP, "Indicates if a reverse dns lookup should be performed on the client ip.", false));
        parameterTypes.add(new ParameterTypeString(PARAMETER_ROBOT_FILTER, "Regular expression specifying which user agents are ignored."));
        parameterTypes.add(new ParameterTypeString(PARAMETER_FILETYPE_FILTER, "Regular expression specifying which files to filter out. Any matching resource will be skipped.", true));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_ONLY_HTTP_200, "Consider only entries with HTTP Response code 200", false));
        parameterTypes.add(new ParameterTypeList(PARAMETER_BROWSER_MATCHER, "This list specifies regular expressions to match browser types. First field is the browser name and the second the regular expression matched to it.", new ParameterTypeString("browser", "The browser identifier", false), new ParameterTypeRegexp(PARAMETER_BROWSER_EXPRESSION, "matches_languages", false)));
        parameterTypes.add(new ParameterTypeList(PARAMETER_OS_MATCHER, "This list specifies regular expressions to match operating system types. First field ist the operating system name and the second specifies the regular expression matched to it.", new ParameterTypeString(PARAMETER_OS, "The operating system identifier", false), new ParameterTypeRegexp(PARAMETER_OS_EXPRESSION, "matches os types", false)));
        parameterTypes.add(new ParameterTypeList(PARAMETER_LANGUAGE_MATCHER, "This list specifies regular expressions to match languages. First field is the name and the second the regular expression matched to it.", new ParameterTypeString("language", "The language identifier", false), new ParameterTypeRegexp(PARAMETER_LANGUAGE_EXPRESSION, "matches_languages", false)));
        parameterTypes.add(new ParameterTypeInt(PARAMETER_SESSION_TIMEOUT, "Time in miliseconds between two requests from the same source, such that the second request can be assumed to be a new session.", 0, Integer.MAX_VALUE, 400000));
        return parameterTypes;
    }
}
