Пример #1
0
  // thread safe method that pulls links from DB for local spidering
  public synchronized Link getLink() throws SpiderDataException {
    ArrayList<Link> rc;
    if (links.size() == 0)
      links = DBLink.getInstance().getLinksAndDelete(conf.getLinkRetrieveCount());

    if (links.isEmpty()) return null;
    else return links.remove(0);
  }
Пример #2
0
  // adds an array of links to newlinks hash.  wont flush till entire
  // array is added
  public synchronized void addLinks(Collection<Link> links) throws SpiderDataException {
    // iterate through links and add each one thats for a unique domain
    for (Link l : links)
      if (!newLinks.containsKey(l.getDomainName())) newLinks.put(l.getDomainName(), l);

    // check if newLinks queue is full
    if (newLinks.size() > conf.getNewLinkQueueSize()) {
      flush();
    }
  }
Пример #3
0
  // thread safe method that returns if there are more links
  public synchronized boolean isEmpty() {
    if (links.size() == 0) {
      try {
        links = DBLink.getInstance().getLinksAndDelete(conf.getLinkRetrieveCount());
      } catch (SpiderDataException e) {
        Logger.log(1, e.getClazz(), e.getMeth(), e.getMessage());
      }
      // if still empty then there are no more links
      if (links.size() == 0) return true;
    }

    return false;
  }