Пример #1
0
  public static void main(String[] args) {

    if (args.length != 7) {
      System.out.println("Missing argument(s), see help below");
      commandWrong();
    }

    Integer h = new Integer(args[0]);

    String opt = (String) args[1];

    Integer k = null;
    Double converge = null;
    if (opt.equals("-k")) {
      k = new Integer(args[2]);
    } else if (opt.equals("-converge")) {
      converge = new Double(args[2]);
    } else {
      System.out.println("-k or -converge wrong");
      commandWrong();
    }

    String query = (String) args[3];
    String netFName = (String) args[4];
    String indexFName = (String) args[5];
    String outputFName = (String) args[6];

    // Step 2. load inverted index, and net file
    loadMap(indexFName);
    System.out.println("map loaded from inverted index");

    loadPageMap(netFName);
    System.out.println("page map loaded from net file");

    // Step 3. get seed set
    // by searching the inverted index with query words.
    String[] queryWords = query.toLowerCase().split(" ");
    List<Integer> seedSet = getSeedSet(queryWords, map);
    System.out.println("seed got, len: " + seedSet.size());

    // Step 4. get base set

    // This set contains pageID: Integer.
    // We could get the corresponding Page for O(1)
    HashSet<Integer> basePageSet = new HashSet<Integer>();

    HashSet<Integer> fiftySet = new HashSet<Integer>();

    Integer maxID = new Integer(0);
    Page curPage;

    for (Integer pageID : seedSet) {
      curPage = page_map.get(pageID);
      // 4.1 add cur
      basePageSet.add(curPage.getPageID());
      if (pageID > maxID) {
        maxID = pageID;
      }
      // 4.2 get inEdges
      for (Page pointingToCur : curPage.getInEdgeList()) {
        fiftySet.add(pointingToCur.getPageID());
        if (pointingToCur.getPageID() > maxID) {
          maxID = pointingToCur.getPageID();
        }
      }
      // 4.3 get outEdges
      for (Page pointedFromCur : curPage.getOutEdgeList()) {
        fiftySet.add(pointedFromCur.getPageID());
        if (pointedFromCur.getPageID() > maxID) {
          maxID = pointedFromCur.getPageID();
        }
      }
    }

    // add fifty page to base set, the fifty with smaller pageID

    int temp_i = 0;
    Integer tempI;
    while (true) {
      if (temp_i == 50) {
        break;
      }
      tempI = new Integer(temp_i);
      if (fiftySet.contains(tempI)) {
        basePageSet.add(tempI);
      }
      temp_i++;
      System.out.println("Got the base page set, len: " + basePageSet.size());
    }

    System.out.println("Got the base page set, len: " + basePageSet.size());

    // Collections.sort(basePageSet, new PageIDComparator());

    // Step 5. start looping
    if (k != null) {
      // 5.A k-loop
      for (int i = 0; i < k; i++) {
        double sumAuth = 0;
        double sumHub = 0;
        for (Integer curPageID : basePageSet) {
          curPage = page_map.get(curPageID);
          double newAuthScore = 0;
          double newHubScore = 0;

          for (Page in : curPage.getInEdgeList()) {
            newAuthScore += in.getHubScore();
          }
          curPage.setNewAuthScore(newAuthScore);
          sumAuth += Math.pow(newAuthScore, 2);

          for (Page out : curPage.getOutEdgeList()) {
            newHubScore += out.getAuthScore();
          }
          curPage.setNewHubScore(newHubScore);
          sumHub += Math.pow(newHubScore, 2);
        }

        // normalize
        for (Integer curPageID : basePageSet) {
          curPage = page_map.get(curPageID);
          curPage.updateNewAuth(sumAuth);
          curPage.updateNewHub(sumHub);
          curPage.updateAuth();
          curPage.updateHub();
        }
      }
    } else {
      while (true) {
        double sumAuth = 0;
        double sumHub = 0;
        for (Integer curPageID : basePageSet) {
          curPage = page_map.get(curPageID);
          double newAuthScore = 0;
          double newHubScore = 0;

          for (Page in : curPage.getInEdgeList()) {
            newAuthScore += in.getHubScore();
          }
          curPage.setNewAuthScore(newAuthScore);
          sumAuth += Math.pow(newAuthScore, 2);

          for (Page out : curPage.getOutEdgeList()) {
            newHubScore += out.getAuthScore();
          }
          curPage.setNewHubScore(newHubScore);
          sumHub += Math.pow(newHubScore, 2);
        }

        boolean totallyConverged = true;
        for (Integer curPageID : basePageSet) {
          curPage = page_map.get(curPageID);
          // normalize
          curPage.updateNewAuth(sumAuth);
          curPage.updateNewHub(sumHub);

          if (!curPage.converged(converge)) {
            // if one not converged, all fail, go to next loop
            System.out.println(
                String.format(
                    "A: %s / %s \nH: %s / %s",
                    curPage.getAuthScore(),
                    curPage.getNewAuthScore(),
                    curPage.getHubScore(),
                    curPage.getNewHubScore()));

            totallyConverged = false;
          }

          curPage.updateAuth();
          curPage.updateHub();
        }

        if (totallyConverged) {
          break;
        }
      }
    }

    System.out.println("maxID: " + maxID.toString());
    try {
      FileWriter fstream = new FileWriter(outputFName);
      BufferedWriter out = new BufferedWriter(fstream);

      // Here is a Pigeonhole sort
      Integer tempID;
      for (int i = 0; i < maxID.intValue(); i++) {
        tempID = new Integer(i);
        if (basePageSet.contains(tempID)) {
          if (h == 0) {
            break;
          }
          h--;
          curPage = page_map.get(tempID);

          String line =
              String.format(
                  "%s, %s, %s\n",
                  curPage.getPageID(), curPage.getAuthScore(), curPage.getHubScore());

          out.write(line);
        }
      }
      out.close();
    } catch (Exception e) { // Catch exception if any
      System.err.println("Error: " + e.getMessage());
    }
  }