@Override
  public String push(final Request entry, CrawlProfile profile, final RobotsTxt robots)
      throws IOException, SpaceExceededException {
    assert entry != null;
    final byte[] hash = entry.url().hash();
    synchronized (this) {
      // double-check
      if (this.has(hash)) return "double occurrence in urlFileIndex";

      // increase dom counter
      if (profile != null) {
        int maxPages = profile.domMaxPages();
        if (maxPages != Integer.MAX_VALUE && maxPages > 0) {
          String host = entry.url().getHost();
          profile.domInc(host);
        }
      }

      // add to index
      Index depthStack = getStack(entry.depth());
      final int s = depthStack.size();
      depthStack.put(entry.toRow());
      assert s < depthStack.size()
          : "hash = " + ASCII.String(hash) + ", s = " + s + ", size = " + depthStack.size();
      assert depthStack.has(hash) : "hash = " + ASCII.String(hash);
    }
    return null;
  }
 @Override
 public int removeAllByProfileHandle(final String profileHandle, final long timeout)
     throws IOException, SpaceExceededException {
   // first find a list of url hashes that shall be deleted
   final long terminate =
       timeout == Long.MAX_VALUE
           ? Long.MAX_VALUE
           : (timeout > 0) ? System.currentTimeMillis() + timeout : Long.MAX_VALUE;
   int count = 0;
   synchronized (this) {
     for (Index depthStack : this.depthStacks.values()) {
       final HandleSet urlHashes =
           new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 100);
       final Iterator<Row.Entry> i = depthStack.rows();
       Row.Entry rowEntry;
       Request crawlEntry;
       while (i.hasNext() && (System.currentTimeMillis() < terminate)) {
         rowEntry = i.next();
         crawlEntry = new Request(rowEntry);
         if (crawlEntry.profileHandle().equals(profileHandle)) {
           urlHashes.put(crawlEntry.url().hash());
         }
         if (System.currentTimeMillis() > terminate) break;
       }
       for (final byte[] urlhash : urlHashes) {
         depthStack.remove(urlhash);
         count++;
       }
     }
   }
   return count;
 }
  @Override
  public Request pop(boolean delay, CrawlSwitchboard cs, RobotsTxt robots) throws IOException {
    // returns a crawl entry from the stack and ensures minimum delta times
    long sleeptime = 0;
    Request crawlEntry = null;
    CrawlProfile profileEntry = null;
    synchronized (this) {
      mainloop:
      while (true) {
        Index depthStack = getLowestStack();
        if (depthStack == null) return null;
        Row.Entry rowEntry = null;
        while (depthStack.size() > 0) {
          rowEntry = depthStack.removeOne();
          if (rowEntry != null) break;
        }
        if (rowEntry == null) continue mainloop;
        crawlEntry = new Request(rowEntry);

        // check blacklist (again) because the user may have created blacklist entries after the
        // queue has been filled
        if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, crawlEntry.url())) {
          if (log.isFine()) log.fine("URL '" + crawlEntry.url() + "' is in blacklist.");
          continue mainloop;
        }

        // at this point we must check if the crawlEntry has relevance because the crawl profile
        // still exists
        // if not: return null. A calling method must handle the null value and try again
        profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle()));
        if (profileEntry == null) {
          if (log.isFine()) log.fine("no profile entry for handle " + crawlEntry.profileHandle());
          continue mainloop;
        }

        // depending on the caching policy we need sleep time to avoid DoS-like situations
        sleeptime = Latency.getDomainSleepTime(robots, profileEntry, crawlEntry.url());
        break;
      }
    }
    if (crawlEntry == null) return null;
    ClientIdentification.Agent agent =
        profileEntry == null
            ? ClientIdentification.yacyInternetCrawlerAgent
            : profileEntry.getAgent();
    long robotsTime = Latency.getRobotsTime(robots, crawlEntry.url(), agent);
    Latency.updateAfterSelection(crawlEntry.url(), profileEntry == null ? 0 : robotsTime);
    if (delay && sleeptime > 0) {
      // force a busy waiting here
      // in best case, this should never happen if the balancer works properly
      // this is only to protection against the worst case, where the crawler could
      // behave in a DoS-manner
      if (log.isInfo())
        log.info(
            "forcing crawl-delay of "
                + sleeptime
                + " milliseconds for "
                + crawlEntry.url().getHost()
                + ": "
                + Latency.waitingRemainingExplain(crawlEntry.url(), robots, agent));
      long loops = sleeptime / 1000;
      long rest = sleeptime % 1000;
      if (loops < 3) {
        rest = rest + 1000 * loops;
        loops = 0;
      }
      Thread.currentThread()
          .setName(
              "Balancer waiting for "
                  + crawlEntry.url().getHost()
                  + ": "
                  + sleeptime
                  + " milliseconds");
      synchronized (this) {
        // must be synchronized here to avoid 'takeover' moves from other threads which then idle
        // the same time which would not be enough
        if (rest > 0) {
          try {
            this.wait(rest);
          } catch (final InterruptedException e) {
          }
        }
        for (int i = 0; i < loops; i++) {
          if (log.isInfo())
            log.info(
                "waiting for "
                    + crawlEntry.url().getHost()
                    + ": "
                    + (loops - i)
                    + " seconds remaining...");
          try {
            this.wait(1000);
          } catch (final InterruptedException e) {
          }
        }
      }
      Latency.updateAfterSelection(crawlEntry.url(), robotsTime);
    }
    return crawlEntry;
  }