/* Returns List of Wikipedia page-ids of pages the string 'anchor' points to in Wikipedia */
 public List<Long> getPages(String anchor) {
   db.requestStart();
   List<Long> PageCollection = new ArrayList<Long>();
   BasicDBObject query = new BasicDBObject();
   query.put("anchor", anchor);
   BasicDBObject fields = new BasicDBObject("pages", true).append("_id", false);
   DBObject obj =
       table.findOne(query, fields); // System.out.println("num of results = "+curs.count());
   if (obj != null) {
     JSONParser jp = new JSONParser();
     JSONArray jarr = null;
     try {
       jarr = (JSONArray) jp.parse(obj.get("pages").toString());
     } catch (ParseException e) {
       jarr = new JSONArray();
     }
     // System.out.println("Link Freq = "+o.get("anchPageFreq").toString());
     for (int i = 0; i < jarr.size(); i++) {
       JSONObject objects = (JSONObject) jarr.get(i);
       PageCollection.add((long) (objects.get("page_id")));
     }
   }
   db.requestDone();
   return PageCollection;
 } // End getPages()
 /* Returns map of Wikipedia page-ids to number of inlinks to those pages. Page ids are pages the string 'anchor' points to in Wikipedia */
 public Map<Long, Integer> getPagesMap(String anchor) {
   db.requestStart();
   Map<Long, Integer> PageCollection = new HashMap<Long, Integer>();
   BasicDBObject query = new BasicDBObject();
   query.put("anchor", anchor);
   BasicDBObject fields =
       new BasicDBObject("page_id", true)
           .append("pages", true)
           .append("page_freq", true)
           .append("anchor_freq", true)
           .append("_id", false);
   DBObject ans =
       table.findOne(query, fields); // System.out.println("num of results = "+curs.count());
   db.requestDone();
   if (ans != null) {
     JSONParser jp = new JSONParser();
     JSONArray jo = null;
     try { // System.out.println(ans.get("pages"));
       jo = (JSONArray) jp.parse(ans.get("pages").toString());
     } catch (ParseException e) {
       e.printStackTrace();
     } // System.out.println("Link Freq = "+o.get("anchPageFreq").toString());
     for (int i = 0; i < jo.size(); i++) {
       JSONObject object = (JSONObject) jo.get(i);
       Long pId = (long) (object.get("page_id"));
       Long pValue0 = (long) object.get("page_freq");
       int pValue = pValue0.intValue();
       if (PageCollection.containsKey(pId)) {
         pValue = PageCollection.get(pId) + pValue;
       }
       PageCollection.put(pId, pValue);
     }
   }
   return PageCollection;
 } // End getPagesMap()
예제 #3
0
  public static void docWrite(List<DBObject> docs) throws UnknownHostException {
    if (docs.size() > 0) {
      if (_docCount.getAndAdd(docs.size()) == 0) {
        String[] params = _targetns.split("\\.");
        DB db = new MongoClient(_tgtURI).getDB(params[0]);
        _tgt = db.getCollection(params[1]);

        db.requestEnsureConnection();
        db.requestStart();
      }

      _tgt.insert(docs.toArray(new DBObject[0]));
    }
  }
 /* Returns number of times 'anchor' occurs in Wikipedia, but is NOT a hyperlink */
 public int getTotalFreq(String anchor) {
   db.requestStart();
   int totalFreq = 0;
   BasicDBObject query = new BasicDBObject(); // create an empty query
   query.put("anchor", anchor);
   BasicDBObject fields = new BasicDBObject("total_freq", true).append("_id", false);
   DBObject obj =
       table.findOne(query, fields); // System.out.println("num of results = "+curs.count());
   if (obj != null) { // System.out.println("Freq = "+o.get("totalFreq").toString());
     totalFreq = (int) obj.get("total_freq");
   }
   db.requestDone();
   return totalFreq;
 } // End getTotalFreq()
 /* Returns two member integer array.
  * member 0 = total number of inlinks for the string anchor.
  * member 1 = number of inlinks to given PageId from the String anchor.*/
 public int[] getPageCountInPages(String anchor, long PageId) {
   db.requestStart();
   int[] PageCountResults = new int[2];
   ;
   int pageCount = 0;
   int totalCount = 0;
   BasicDBObject query = new BasicDBObject();
   query.put("anchor", anchor);
   BasicDBObject fields =
       new BasicDBObject("pages", true)
           .append("anchor_freq", true)
           .append("total_freq", true)
           .append("_id", false);
   DBObject obj =
       table.findOne(query, fields); // System.out.println("Pages Total = "+curs.count());
   db.requestDone();
   if (obj != null) {
     // System.out.println("Obj = "+o.get("pageId").toString());
     JSONParser jp = new JSONParser();
     JSONArray jarr = null;
     try {
       jarr = (JSONArray) jp.parse(obj.get("pages").toString());
     } catch (ParseException e) {
       jarr = new JSONArray();
     }
     // System.out.println("Link Freq = "+o.get("anchPageFreq").toString());
     for (int i = 0; i < jarr.size(); i++) {
       JSONObject jo = (JSONObject) jarr.get(i);
       if (PageId == (long) jo.get("page_id")) {
         Long pageCount0 = (long) jo.get("page_freq"); // ++pageCount;
         pageCount += pageCount0.intValue();
       }
     }
     totalCount += (int) obj.get("anchor_freq");
   }
   PageCountResults[1] = pageCount; // System.out.println("Pages matching = "+pageCount);
   PageCountResults[0] = totalCount;
   return PageCountResults;
 } // End getPageCountInPages()