// thread safe method that pulls links from DB for local spidering public synchronized Link getLink() throws SpiderDataException { ArrayList<Link> rc; if (links.size() == 0) links = DBLink.getInstance().getLinksAndDelete(conf.getLinkRetrieveCount()); if (links.isEmpty()) return null; else return links.remove(0); }
// adds an array of links to newlinks hash. wont flush till entire // array is added public synchronized void addLinks(Collection<Link> links) throws SpiderDataException { // iterate through links and add each one thats for a unique domain for (Link l : links) if (!newLinks.containsKey(l.getDomainName())) newLinks.put(l.getDomainName(), l); // check if newLinks queue is full if (newLinks.size() > conf.getNewLinkQueueSize()) { flush(); } }
// thread safe method that returns if there are more links public synchronized boolean isEmpty() { if (links.size() == 0) { try { links = DBLink.getInstance().getLinksAndDelete(conf.getLinkRetrieveCount()); } catch (SpiderDataException e) { Logger.log(1, e.getClazz(), e.getMeth(), e.getMessage()); } // if still empty then there are no more links if (links.size() == 0) return true; } return false; }