public void calculatePageRank(double cVal) {
    try {
      c = cVal;
      linkAnalysis = new LinkAnalysis();
      numDocs = linkAnalysis.getNumDocs();
      resetFactor = (1 - c) * ((1.0) / numDocs);
      zFactor = c * ((1.0) / numDocs);
      int index = 0;
      BufferedReader br = new BufferedReader(new FileReader(citationsFile));
      String s = "";
      while ((s = br.readLine()) != null) {
        String[] words = s.split("->"); // split the
        // src->dest1,dest2,dest3 string
        int src = Integer.parseInt(words[0]);
        docIdIndex.put(src, index);
        // System.out.println(" Src : " + src + "  index :  " + index);
        invertedDocIdIndex.put(index, src);
        int numLinks = linkAnalysis.getLinks(src).length;
        nodeLinks.put(src, numLinks);
        index++;
      }
      br.close();

      long startTIme = System.nanoTime();
      initializePageRank();
      boolean isStable = false;
      double[] tempVal = new double[numDocs];
      int numIteration = 0;
      while (true) {
        numIteration++;
        for (int docId : docIdIndex.keySet()) {
          double sum = 0.0;
          int[] citations = linkAnalysis.getCitations(docId);
          if (citations.length == 0) {
            for (int i : docIdIndex.keySet()) {
              int numLinks = nodeLinks.get(i);
              // int numLinks = linkAnalysis.getLinks(i).length;
              if (numLinks == 0) {
                sum += ((zFactor + resetFactor) * pageRankValues[docIdIndex.get(i)]);
              } else {
                sum += (resetFactor * pageRankValues[docIdIndex.get(i)]);
              }
            }
          } else {
            HashSet<Integer> citationsList = new HashSet<Integer>();
            for (int citation : citations) {
              citationsList.add(citation);
            }

            for (int docNum : docIdIndex.keySet()) {
              int numLinks = nodeLinks.get(docNum);
              if (!citationsList.contains(docNum)) {
                if (numLinks == 0) {
                  sum += ((zFactor + resetFactor) * pageRankValues[docIdIndex.get(docNum)]);
                } else {
                  sum += (resetFactor * pageRankValues[docIdIndex.get(docNum)]);
                }
              } else {
                double val = c * ((1.0) / numLinks) + resetFactor;
                sum += (val * pageRankValues[docIdIndex.get(docNum)]);
              }
            }
          }
          tempVal[docIdIndex.get(docId)] = pageRankValues[docIdIndex.get(docId)];
          pageRankValues[docIdIndex.get(docId)] = sum;
        }
        normalizePageRankValues();
        isStable = verifyStability(tempVal);
        System.out.println("Iteration No : " + numIteration);
        if (isStable) {
          System.out.println("Stability Reached ");
          break;
        }
      }
      divByMaxVal();
      mapValues();
      System.out.println("Time taken to compute Page Rank " + (System.nanoTime() - startTIme));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
示例#2
0
  public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub
    while (true) {
      Search_TF_IDF obj = new Search_TF_IDF();
      // Getting the TF_IDF results
      HashMap<Integer, Double> TF_IDF = (HashMap<Integer, Double>) obj.TF_IDF();
      IndexReader r = IndexReader.open(FSDirectory.open(new File("index")));
      Iterator<Entry<Integer, Double>> m = TF_IDF.entrySet().iterator();
      ArrayList<Integer> topk = new ArrayList<Integer>();
      int n = 0;
      // storing the top 10 results in an arraylist
      while (m.hasNext()) {
        n++;
        Entry<Integer, Double> entry = m.next();
        topk.add(entry.getKey());
        if (n == 10) break;
      }
      LinkAnalysis.numDocs = 25054;
      LinkAnalysis l = new LinkAnalysis();
      // Storing the root set in a hashmap
      HashMap<Integer, ArrayList<Integer>> adjMatrix = new HashMap<Integer, ArrayList<Integer>>();
      for (int z : topk) {

        adjMatrix.put(z, new ArrayList<Integer>());
      }
      // computing the base set and storing in the hashmap
      for (int z : topk) {
        int[] links = l.getLinks(z);
        int[] citations = l.getCitations(z);
        for (int x : links) {
          if (adjMatrix.containsKey(x) != true) adjMatrix.put(x, new ArrayList<Integer>());
        }
        for (int x : citations) {
          if (adjMatrix.containsKey(x) != true) adjMatrix.put(x, new ArrayList<Integer>());
        }
      }
      // Computing the adjacency matrix
      Iterator<Entry<Integer, ArrayList<Integer>>> y = adjMatrix.entrySet().iterator();
      while (y.hasNext()) {
        Entry<Integer, ArrayList<Integer>> entry = y.next();
        int[] links = l.getLinks(entry.getKey());
        Iterator<Entry<Integer, ArrayList<Integer>>> s = adjMatrix.entrySet().iterator();
        while (s.hasNext()) {

          Entry<Integer, ArrayList<Integer>> entry1 = s.next();

          int q = 0;
          for (int w : links) {
            if (entry1.getKey() == w) {
              adjMatrix.get(entry.getKey()).add(1);
              q++;
            }
          }
          if (q == 0) adjMatrix.get(entry.getKey()).add(0);
        }
      }
      // Constructing transpose of adjacency matrix
      String timestampAMT =
          new java.text.SimpleDateFormat("MM/dd/yyyy h:mm:ss:SSSSSSS a").format(new Date());
      HashMap<Integer, ArrayList<Integer>> tradjMatrix = new HashMap<Integer, ArrayList<Integer>>();
      Iterator<Entry<Integer, ArrayList<Integer>>> e = adjMatrix.entrySet().iterator();
      while (e.hasNext()) {

        Entry<Integer, ArrayList<Integer>> entry = e.next();
        tradjMatrix.put(entry.getKey(), new ArrayList<Integer>());
      }
      Iterator<Entry<Integer, ArrayList<Integer>>> f = adjMatrix.entrySet().iterator();
      while (f.hasNext()) {
        Entry<Integer, ArrayList<Integer>> entry = f.next();
        ArrayList<Integer> alist = entry.getValue();
        int z = 0;
        Iterator<Entry<Integer, ArrayList<Integer>>> fl = adjMatrix.entrySet().iterator();
        while (fl.hasNext()) {
          Entry<Integer, ArrayList<Integer>> entry1 = fl.next();
          tradjMatrix.get(entry1.getKey()).add(alist.get(z));
          z++;
        }
      }
      // Initializing an arbitrary vectors for authorities and hubs
      int rows = adjMatrix.size();
      double value = 1 / (double) rows;
      double threshold = 0.000000002;
      double[] auth = new double[rows];
      double[] hub = new double[rows];
      for (int a = 0; a < rows; a++) {
        auth[a] = value;
        hub[a] = value;
      }
      double largesterror = 100;
      int iteration = 1;
      while (largesterror > threshold) {
        // declaring temporary auth and hub vectors and initializing them
        double[] tempauth = new double[rows];
        double[] temphub = new double[rows];

        for (int c = 0; c < rows; c++) {
          tempauth[c] = auth[c];
          temphub[c] = hub[c];
        }
        // Multiplying transpose of adjacency matrix and hub
        int count2 = 0;

        Iterator<Entry<Integer, ArrayList<Integer>>> fr = tradjMatrix.entrySet().iterator();
        while (fr.hasNext()) {
          double sum1 = 0;
          Entry<Integer, ArrayList<Integer>> entry1 = fr.next();
          ArrayList<Integer> blist = entry1.getValue();
          for (int count1 = 0; count1 < rows; count1++) {
            sum1 += (blist.get(count1)) * (temphub[count1]);
          }
          auth[count2] = sum1;
          count2++;
        }

        int count3 = 0;
        // Multiplying adjacency matrix and authority
        Iterator<Entry<Integer, ArrayList<Integer>>> fr1 = adjMatrix.entrySet().iterator();
        while (fr1.hasNext()) {
          double sum2 = 0;
          Entry<Integer, ArrayList<Integer>> entry2 = fr1.next();
          ArrayList<Integer> clist = entry2.getValue();
          for (int count4 = 0; count4 < rows; count4++) {
            sum2 += (clist.get(count4)) * (auth[count4]);
          }
          hub[count3] = sum2;
          count3++;
        }

        // Normalizing authority and hub vectors
        String timestampAHN =
            new java.text.SimpleDateFormat("MM/dd/yyyy h:mm:ss:SSSSSSS a").format(new Date());

        double sum3 = 0, sum4 = 0;
        for (int count = 0; count < rows; count++) {
          sum3 += Math.pow(auth[count], 2);
          sum4 += Math.pow(hub[count], 2);
        }
        double sum3sqroot = Math.sqrt(sum3);
        double sum4sqroot = Math.sqrt(sum4);
        for (int count = 0; count < rows; count++) {
          auth[count] = (auth[count] / sum3sqroot);
          hub[count] = (hub[count] / sum4sqroot);
        }

        // Computing largest errors
        double authLerror = 0;
        double hubLerror = 0;
        for (int count = 0; count < rows; count++) {
          double tempdiff = Math.abs(auth[count] - tempauth[count]);

          if (tempdiff > authLerror) authLerror = tempdiff;
        }
        for (int count = 0; count < rows; count++) {
          double tempdiff = Math.abs(hub[count] - temphub[count]);
          if (tempdiff > hubLerror) hubLerror = tempdiff;
        }
        if (hubLerror >= authLerror) largesterror = hubLerror;
        else largesterror = authLerror;
        iteration++;
      }
      // storing the auth and hub vectors in hashmaps
      HashMap<Integer, Double> authlist = new HashMap<Integer, Double>();
      HashMap<Integer, Double> hublist = new HashMap<Integer, Double>();
      int ll = 0;
      Iterator<Entry<Integer, ArrayList<Integer>>> fz = adjMatrix.entrySet().iterator();
      while (fz.hasNext()) {
        Entry<Integer, ArrayList<Integer>> entry = fz.next();
        authlist.put(entry.getKey(), auth[ll]);
        hublist.put(entry.getKey(), hub[ll]);
        ll++;
      }
      // sorting auth and hub vectors
      Map<Integer, Double> sortedauthlist = Search_TF_IDF.sortByValues(authlist);
      Map<Integer, Double> sortedhublist = Search_TF_IDF.sortByValues(hublist);
      System.out.println("Documents in decreasing order of Authority Values:");
      Iterator<Entry<Integer, Double>> fx = sortedauthlist.entrySet().iterator();
      int c = 0;

      while (fx.hasNext()) {
        Entry<Integer, Double> entry = fx.next();
        String d_url =
            r.document(entry.getKey()).getFieldable("path").stringValue().replace("%%", "/");
        System.out.println("[" + entry.getKey() + "] " + d_url);

        c++;
        if (c == 10) break;
      }
      System.out.println("Documents in decreasing order of Hub Values:");
      Iterator<Entry<Integer, Double>> fw = sortedhublist.entrySet().iterator();

      while (fw.hasNext()) {
        c++;
        if (c == 21) break;
        Entry<Integer, Double> entry = fw.next();
        String d_url =
            r.document(entry.getKey()).getFieldable("path").stringValue().replace("%%", "/");
        System.out.println("[" + entry.getKey() + "] " + d_url);
      }
    }
  }