@Override public boolean shouldVisit(WebURL url) { // Don't crawl non-HTML pages String href = url.getURL().toLowerCase(); // Turns http://www.ics.uci.edu/SomePage.PHP -> // http://www.ics.uci.edu/somepage.php if (FILTERS.matcher(href).matches()) // filter using file extension return false; // Only crawl within the domain of the seed URL String currentUrlDomain = URLHelper.getDomain(url.getURL()); String seedUrlDomain = URLHelper.getDomain(this.params.getSeedUrl()); if (currentUrlDomain == null || !currentUrlDomain.endsWith(seedUrlDomain)) return false; // Don't crawl the same pages too many times (avoid infinite loops) if (!stats.intendToVisit(url.getURL())) return false; return true; }