public static void viewTagInfo(String inurl, String tag) throws Exception { // pr = new PrintStream(new FileOutputStream("/semplest/lluis/keywordExp/urldata.txt")); pr = System.out; long start = System.currentTimeMillis(); pr.println(inurl + "****************************************************************"); printList(cleanUrlText(TextUtils.HTMLText(inurl, tag))); String urls = TextUtils.HTMLLinkString(inurl, inurl); String[] url = urls.split("\\s+"); Set<String> urlMap = new HashSet<String>(url.length); urlMap.add(inurl); for (String ur : url) { if (!urlMap.contains(ur)) { pr.println(ur + "***************************************************************"); try { printList(cleanUrlText(TextUtils.HTMLText(ur, tag))); } catch (Exception e) { System.out.println("Error with url :" + ur); e.printStackTrace(); logger.error("Problem", e); } urlMap.add(ur); } } pr.println("Time elapsed" + (start - System.currentTimeMillis())); }
public static void sortAndPrintMap(Map<String, Double> map) throws FileNotFoundException { // pr = new PrintStream(new FileOutputStream("/semplest/lluis/keywordExp/wordmap.txt")); pr = System.out; ValueComparator bvc = new ValueComparator(map); TreeMap<String, Double> sorted = new TreeMap<String, Double>(bvc); sorted.putAll(map); for (String key : sorted.keySet()) { pr.println(key + " : " + map.get(key)); } System.out.println("Number of words : " + map.size()); }
public static void recordData(String inurl) throws Exception { // pr = new PrintStream(new FileOutputStream("http://en.wikipedia.org/wiki/HAProxy")); pr = System.out; long start = System.currentTimeMillis(); pr.println(inurl + "****************************************************************"); printList(cleanUrlText(TextUtils.HTMLText(inurl))); String urls = TextUtils.HTMLLinkString(inurl, inurl); String[] url = urls.split("\\s+"); HashSet<String> urlMap = new HashSet<String>(url.length); urlMap.add(inurl); for (String ur : url) { /* * if(!urlMap.contains(ur)){ pr.println(ur+"***************************************************************"); try{ * printList(cleanUrlText(TextUtils.HTMLText(ur))); }catch(Exception e){ System.out.println("Error with url :"+ ur); e.printStackTrace(); } * urlMap.add(ur); } */ } pr.println("Time elapsed" + (start - System.currentTimeMillis())); pr.close(); }
public static void printList(List<String> in) { for (String el : in) { pr.println(el); } }