package edu.uci.ics.crawler4j.util;

import edu.uci.ics.crawler4j.url.WebURL;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpHost;
import org.polliwog.Constants;

/* loaded from: input_file:edu/uci/ics/crawler4j/util/Net.class */
public class Net {
    private static final Pattern pattern = initializePattern();

    public static Set<WebURL> extractUrls(String str) {
        HashSet hashSet = new HashSet();
        if (str != null) {
            Matcher matcher = pattern.matcher(str);
            while (matcher.find()) {
                WebURL webURL = new WebURL();
                String group = matcher.group();
                if (!group.startsWith(HttpHost.DEFAULT_SCHEME_NAME)) {
                    group = Constants.HTTP + group;
                }
                webURL.setURL(group);
                hashSet.add(webURL);
            }
        }
        return hashSet;
    }

    private static Pattern initializePattern() {
        return Pattern.compile("\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum|travel|[a-z]{2}))(:[\\d]{1,5})?(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
    }
}
