Beispiel #1
0
 /** Override getTargetPageURLs to add target URL. */
 public Set<URL> getTargetPageURLs() {
   Set<URL> urls = new HashSet<URL>();
   String url =
       "http://www.infoq.com/news/2012/12/twemproxy;jsessionid=1652D82C3359CBAB67DA00B26BE7784B";
   urls.add(URL.valueOf(url));
   return urls;
 }
Beispiel #2
0
 /** Override to remove unnecessary URL. */
 @Override
 public Collection<URL> getUrlsToFilter() {
   Set<URL> filterSet = new HashSet<URL>();
   String url =
       "http://www.infoq.com/news/2012/11/Panel-WinRT-Answers;jsessionid=91AB81A159E85692E6F1199644E2053C ";
   filterSet.add(URL.valueOf(url));
   return filterSet;
 }
Beispiel #3
0
 /**
  * Override getListPageURLs to get paging URL
  *
  * @see {@link AbstractWebCrawler.getListPageURLs}
  */
 public Set<URL> getListPageURLs() {
   Set<URL> urls = new HashSet<URL>();
   String url = "http://www.infoq.com/infoq.action?newsidx=";
   int j = 10;
   for (int i = 1; i <= 10; i++) {
     String pagingUrl = url + (j * i);
     System.out.println(pagingUrl);
     urls.add(URL.valueOf(pagingUrl));
   }
   return urls;
 }