/* Returns List of Wikipedia page-ids of pages the string 'anchor' points to in Wikipedia */ public List<Long> getPages(String anchor) { db.requestStart(); List<Long> PageCollection = new ArrayList<Long>(); BasicDBObject query = new BasicDBObject(); query.put("anchor", anchor); BasicDBObject fields = new BasicDBObject("pages", true).append("_id", false); DBObject obj = table.findOne(query, fields); // System.out.println("num of results = "+curs.count()); if (obj != null) { JSONParser jp = new JSONParser(); JSONArray jarr = null; try { jarr = (JSONArray) jp.parse(obj.get("pages").toString()); } catch (ParseException e) { jarr = new JSONArray(); } // System.out.println("Link Freq = "+o.get("anchPageFreq").toString()); for (int i = 0; i < jarr.size(); i++) { JSONObject objects = (JSONObject) jarr.get(i); PageCollection.add((long) (objects.get("page_id"))); } } db.requestDone(); return PageCollection; } // End getPages()
/* Returns map of Wikipedia page-ids to number of inlinks to those pages. Page ids are pages the string 'anchor' points to in Wikipedia */ public Map<Long, Integer> getPagesMap(String anchor) { db.requestStart(); Map<Long, Integer> PageCollection = new HashMap<Long, Integer>(); BasicDBObject query = new BasicDBObject(); query.put("anchor", anchor); BasicDBObject fields = new BasicDBObject("page_id", true) .append("pages", true) .append("page_freq", true) .append("anchor_freq", true) .append("_id", false); DBObject ans = table.findOne(query, fields); // System.out.println("num of results = "+curs.count()); db.requestDone(); if (ans != null) { JSONParser jp = new JSONParser(); JSONArray jo = null; try { // System.out.println(ans.get("pages")); jo = (JSONArray) jp.parse(ans.get("pages").toString()); } catch (ParseException e) { e.printStackTrace(); } // System.out.println("Link Freq = "+o.get("anchPageFreq").toString()); for (int i = 0; i < jo.size(); i++) { JSONObject object = (JSONObject) jo.get(i); Long pId = (long) (object.get("page_id")); Long pValue0 = (long) object.get("page_freq"); int pValue = pValue0.intValue(); if (PageCollection.containsKey(pId)) { pValue = PageCollection.get(pId) + pValue; } PageCollection.put(pId, pValue); } } return PageCollection; } // End getPagesMap()
public static void docWrite(List<DBObject> docs) throws UnknownHostException { if (docs.size() > 0) { if (_docCount.getAndAdd(docs.size()) == 0) { String[] params = _targetns.split("\\."); DB db = new MongoClient(_tgtURI).getDB(params[0]); _tgt = db.getCollection(params[1]); db.requestEnsureConnection(); db.requestStart(); } _tgt.insert(docs.toArray(new DBObject[0])); } }
/* Returns number of times 'anchor' occurs in Wikipedia, but is NOT a hyperlink */ public int getTotalFreq(String anchor) { db.requestStart(); int totalFreq = 0; BasicDBObject query = new BasicDBObject(); // create an empty query query.put("anchor", anchor); BasicDBObject fields = new BasicDBObject("total_freq", true).append("_id", false); DBObject obj = table.findOne(query, fields); // System.out.println("num of results = "+curs.count()); if (obj != null) { // System.out.println("Freq = "+o.get("totalFreq").toString()); totalFreq = (int) obj.get("total_freq"); } db.requestDone(); return totalFreq; } // End getTotalFreq()
/* Returns two member integer array. * member 0 = total number of inlinks for the string anchor. * member 1 = number of inlinks to given PageId from the String anchor.*/ public int[] getPageCountInPages(String anchor, long PageId) { db.requestStart(); int[] PageCountResults = new int[2]; ; int pageCount = 0; int totalCount = 0; BasicDBObject query = new BasicDBObject(); query.put("anchor", anchor); BasicDBObject fields = new BasicDBObject("pages", true) .append("anchor_freq", true) .append("total_freq", true) .append("_id", false); DBObject obj = table.findOne(query, fields); // System.out.println("Pages Total = "+curs.count()); db.requestDone(); if (obj != null) { // System.out.println("Obj = "+o.get("pageId").toString()); JSONParser jp = new JSONParser(); JSONArray jarr = null; try { jarr = (JSONArray) jp.parse(obj.get("pages").toString()); } catch (ParseException e) { jarr = new JSONArray(); } // System.out.println("Link Freq = "+o.get("anchPageFreq").toString()); for (int i = 0; i < jarr.size(); i++) { JSONObject jo = (JSONObject) jarr.get(i); if (PageId == (long) jo.get("page_id")) { Long pageCount0 = (long) jo.get("page_freq"); // ++pageCount; pageCount += pageCount0.intValue(); } } totalCount += (int) obj.get("anchor_freq"); } PageCountResults[1] = pageCount; // System.out.println("Pages matching = "+pageCount); PageCountResults[0] = totalCount; return PageCountResults; } // End getPageCountInPages()