package com.rapidminer.operator.preprocessing.ie.features.tools;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

/* loaded from: input_file:com/rapidminer/operator/preprocessing/ie/features/tools/WikipediaResourceMgr.class */
public class WikipediaResourceMgr {
    private static WikipediaResourceMgr instance = null;
    HashMap<String, ArrayList<Integer>> link2ArtIdMap = new HashMap<>();
    HashMap<Integer, ArrayList<String>> artID2WikiCatsMap = new HashMap<>();
    public String encoding = "UTF-8";
    String wikiResDir = "/home/jungerma/Competitions/Evalita/data";
    String linksFile = this.wikiResDir + "/pages.txt.linkTextOccurrence.txt";
    String articleIdFile = this.wikiResDir + "/pagesHeadID.txt";
    String articleIdWikiCatFile = this.wikiResDir + "/fileB.txt";

    private WikipediaResourceMgr() {
        fillLink2ArticleIDMap(this.linksFile, this.articleIdFile);
        fillartID2WikiCatsMap(this.articleIdWikiCatFile);
    }

    public static synchronized WikipediaResourceMgr getInstance() {
        if (instance == null) {
            instance = new WikipediaResourceMgr();
        }
        return instance;
    }

    public HashSet<String> getWikiCats(String str) {
        HashSet<String> hashSet = new HashSet<>();
        ArrayList<Integer> artIDsForLink = getArtIDsForLink(str.toUpperCase());
        if (artIDsForLink == null || artIDsForLink.size() == 0) {
            return null;
        }
        for (int i = 0; i < artIDsForLink.size(); i++) {
            if (getWikiCatsForArtID(artIDsForLink.get(i).intValue()) != null) {
                hashSet.addAll(getWikiCatsForArtID(artIDsForLink.get(i).intValue()));
            }
        }
        return hashSet;
    }

    private ArrayList<String> getWikiCatsForArtID(int i) {
        return this.artID2WikiCatsMap.get(Integer.valueOf(i));
    }

    private ArrayList<Integer> getArtIDsForLink(String str) {
        return this.link2ArtIdMap.get(str);
    }

    private void fillartID2WikiCatsMap(String str) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), this.encoding));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                readLine.trim();
                if (readLine.length() >= 2) {
                    int indexOf = readLine.indexOf("\t");
                    if (indexOf > -1) {
                        String substring = readLine.substring(indexOf + 1, readLine.indexOf("\t", indexOf + 1));
                        Integer num = new Integer(readLine.substring(0, indexOf));
                        if (!this.artID2WikiCatsMap.containsKey(num)) {
                            this.artID2WikiCatsMap.put(num, new ArrayList<>());
                        }
                        this.artID2WikiCatsMap.get(num).add(substring);
                    } else {
                        System.out.println("buggy line:  " + readLine);
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void fillLink2ArticleIDMap(String str, String str2) {
        HashMap<String, Integer> readArticleIdFile = readArticleIdFile(str2);
        for (String str3 : readArticleIdFile.keySet()) {
            if (!this.link2ArtIdMap.containsKey(str3)) {
                this.link2ArtIdMap.put(str3, new ArrayList<>());
            }
            this.link2ArtIdMap.get(str3).add(readArticleIdFile.get(str3));
        }
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), this.encoding));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                readLine.trim();
                if (readLine.length() >= 2) {
                    i3++;
                    if (readLine.indexOf("|") > -1) {
                        String upperCase = readLine.substring(readLine.indexOf("|") + 1).toUpperCase();
                        String upperCase2 = readLine.substring(0, readLine.indexOf("|")).toUpperCase();
                        if (upperCase2.length() >= 2) {
                            Integer num = readArticleIdFile.get(upperCase2.substring(0, 1).toUpperCase() + upperCase2.substring(1));
                            if (num != null) {
                                if (!this.link2ArtIdMap.containsKey(upperCase)) {
                                    this.link2ArtIdMap.put(upperCase, new ArrayList<>());
                                }
                                this.link2ArtIdMap.get(upperCase).add(num);
                            } else if (upperCase2.indexOf(35) > -1) {
                                i2++;
                            } else {
                                i++;
                            }
                        }
                    } else {
                        System.out.println("buggy line:  " + readLine);
                    }
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println("WikipediaResourceMgr:" + i3 + " total links; " + i2 + " are anchorLinks - not used! " + i + " links point to nowhere or could not be resolved due to ???????");
    }

    private HashMap<String, Integer> readArticleIdFile(String str) {
        HashMap<String, Integer> hashMap = new HashMap<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), this.encoding));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                readLine.trim();
                if (readLine.length() >= 2) {
                    if (readLine.indexOf("\t") > -1) {
                        hashMap.put(readLine.substring(0, readLine.indexOf("\t")).toUpperCase(), new Integer(readLine.substring(readLine.indexOf("\t") + 1)));
                    } else {
                        System.out.println("buggy line:  " + readLine);
                    }
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return hashMap;
    }
}
