Exemplo n.º 1
0
 public static void viewTagInfo(String inurl, String tag) throws Exception {
   // pr = new PrintStream(new FileOutputStream("/semplest/lluis/keywordExp/urldata.txt"));
   pr = System.out;
   long start = System.currentTimeMillis();
   pr.println(inurl + "****************************************************************");
   printList(cleanUrlText(TextUtils.HTMLText(inurl, tag)));
   String urls = TextUtils.HTMLLinkString(inurl, inurl);
   String[] url = urls.split("\\s+");
   Set<String> urlMap = new HashSet<String>(url.length);
   urlMap.add(inurl);
   for (String ur : url) {
     if (!urlMap.contains(ur)) {
       pr.println(ur + "***************************************************************");
       try {
         printList(cleanUrlText(TextUtils.HTMLText(ur, tag)));
       } catch (Exception e) {
         System.out.println("Error with url :" + ur);
         e.printStackTrace();
         logger.error("Problem", e);
       }
       urlMap.add(ur);
     }
   }
   pr.println("Time elapsed" + (start - System.currentTimeMillis()));
 }
Exemplo n.º 2
0
 public static void recordData(String inurl) throws Exception {
   // pr = new PrintStream(new FileOutputStream("http://en.wikipedia.org/wiki/HAProxy"));
   pr = System.out;
   long start = System.currentTimeMillis();
   pr.println(inurl + "****************************************************************");
   printList(cleanUrlText(TextUtils.HTMLText(inurl)));
   String urls = TextUtils.HTMLLinkString(inurl, inurl);
   String[] url = urls.split("\\s+");
   HashSet<String> urlMap = new HashSet<String>(url.length);
   urlMap.add(inurl);
   for (String ur : url) {
     /*
      * if(!urlMap.contains(ur)){ pr.println(ur+"***************************************************************"); try{
      * printList(cleanUrlText(TextUtils.HTMLText(ur))); }catch(Exception e){ System.out.println("Error with url :"+ ur); e.printStackTrace(); }
      * urlMap.add(ur); }
      */
   }
   pr.println("Time elapsed" + (start - System.currentTimeMillis()));
   pr.close();
 }