private void requestNextLinks(LevelLink levelLink) throws InterruptedException { int currentLevel = levelLink.getLevel() + 1; String ch = ""; for (int i = 0; i < currentLevel; i++) { ch = ch + "--"; } if (currentLevel >= maxDeep) { return; } randomPause(); Elements links = getLinks(levelLink.getLink()); if (links == null) { return; } LOG.log( Level.DEBUG, " " + ch + " " + levelLink.getLink() + " ||| Thread: " + Thread.currentThread().getName() + " ||| level: " + currentLevel + " newLinks=" + links.size()); // get all ref on a page for (Element element : links) { URL url = null; try { url = new URL(element.attr("abs:href")); } catch (MalformedURLException e) { LOG.log(Level.INFO, "Exception: ", e); } String newURI = Utils.toFullForm(url, false); if (newURI == null) { continue; } putLink(new LevelLink(newURI, currentLevel)); } LevelLink nextLevelLink = readNextLink(); if (nextLevelLink != null) { requestNextLinks(nextLevelLink); } }
public synchronized void putLink(LevelLink levelLink) throws InterruptedException { if (levelLink == null) return; if (!fullLinks.contains(levelLink.getLink())) { queue.offer(levelLink, 10, TimeUnit.MILLISECONDS); } }
private synchronized void putResultLink(LevelLink resultLink) throws InterruptedException { if (!fullLinks.contains(resultLink.getLink())) { resultLinks.offer(resultLink.getLink(), 10, TimeUnit.MILLISECONDS); fullLinks.add(resultLink.getLink()); } }