@Override
 public int removeAllByProfileHandle(final String profileHandle, final long timeout)
     throws IOException, SpaceExceededException {
   // first find a list of url hashes that shall be deleted
   final long terminate =
       timeout == Long.MAX_VALUE
           ? Long.MAX_VALUE
           : (timeout > 0) ? System.currentTimeMillis() + timeout : Long.MAX_VALUE;
   int count = 0;
   synchronized (this) {
     for (Index depthStack : this.depthStacks.values()) {
       final HandleSet urlHashes =
           new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 100);
       final Iterator<Row.Entry> i = depthStack.rows();
       Row.Entry rowEntry;
       Request crawlEntry;
       while (i.hasNext() && (System.currentTimeMillis() < terminate)) {
         rowEntry = i.next();
         crawlEntry = new Request(rowEntry);
         if (crawlEntry.profileHandle().equals(profileHandle)) {
           urlHashes.put(crawlEntry.url().hash());
         }
         if (System.currentTimeMillis() > terminate) break;
       }
       for (final byte[] urlhash : urlHashes) {
         depthStack.remove(urlhash);
         count++;
       }
     }
   }
   return count;
 }
Beispiel #2
0
  /**
   * count number of references for a given term this method may cause strong IO load if called too
   * frequently.
   */
  @Override
  public int count(final byte[] termHash) {
    final Integer cachedCount = this.countCache.get(termHash);
    if (cachedCount != null) return cachedCount.intValue();

    int countFile = 0;
    // read fresh values from file
    try {
      countFile = this.array.count(termHash);
    } catch (final Throwable e) {
      ConcurrentLog.logException(e);
    }
    assert countFile >= 0;

    // count from container in ram
    final ReferenceContainer<ReferenceType> countRam = this.ram.get(termHash, null);
    assert countRam == null || countRam.size() >= 0;
    int c = countRam == null ? countFile : countFile + countRam.size();
    // exclude entries from delayed remove
    synchronized (this.removeDelayedURLs) {
      final HandleSet s = this.removeDelayedURLs.get(termHash);
      if (s != null) c -= s.size();
      if (c < 0) c = 0;
    }
    // put count result into cache
    if (MemoryControl.shortStatus()) this.countCache.clear();
    this.countCache.insert(termHash, c);
    return c;
  }
 /**
  * special iterator for BufferedObjectIndex: iterates only objects from the buffer. The use case
  * for this iterator is given if first elements are iterated and then all iterated elements are
  * deleted from the index. To minimize the IO load the buffer is filled from the backend in such a
  * way that it creates a minimum of Read/Write-Head operations which is done using the removeOne()
  * method. The buffer will be filled with the demanded number of records. The given load value
  * does not denote the number of removeOne() operations but the number of records that are missing
  * in the buffer to provide the give load number of record entries. The given load number must not
  * exceed the maximal number of entries in the buffer. To give room for put()-inserts while the
  * iterator is running it is recommended to set the load value at maximum to the maximum number of
  * entries in the buffer divided by two.
  *
  * @param load number of records that shall be in the buffer when returning the buffer iterator
  * @return an iterator of the elements in the buffer.
  * @throws IOException
  */
 public HandleSet keysFromBuffer(final int load) throws IOException {
   if (load > this.buffersize) throw new IOException("buffer load size exceeded");
   synchronized (this.backend) {
     int missing = Math.min(this.backend.size(), load - this.buffer.size());
     while (missing-- > 0) {
       try {
         this.buffer.put(this.backend.removeOne());
       } catch (final SpaceExceededException e) {
         ConcurrentLog.logException(e);
         break;
       }
     }
     final HandleSet handles =
         new RowHandleSet(
             this.buffer.row().primaryKeyLength,
             this.buffer.row().objectOrder,
             this.buffer.size());
     final Iterator<byte[]> i = this.buffer.keys();
     while (i.hasNext()) {
       try {
         handles.put(i.next());
       } catch (final SpaceExceededException e) {
         ConcurrentLog.logException(e);
         break;
       }
     }
     handles.optimize();
     return handles;
   }
 }
Beispiel #4
0
 @Override
 public void removeDelayed(final byte[] termHash, final byte[] urlHashBytes) {
   HandleSet r;
   synchronized (this.removeDelayedURLs) {
     r = this.removeDelayedURLs.get(termHash);
   }
   if (r == null) {
     r = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 0);
   }
   try {
     r.put(urlHashBytes);
   } catch (final SpaceExceededException e) {
     try {
       remove(termHash, urlHashBytes);
     } catch (final IOException e1) {
     }
     return;
   }
   synchronized (this.removeDelayedURLs) {
     this.removeDelayedURLs.put(termHash, r);
   }
 }
Beispiel #5
0
  @Override
  public void removeDelayed() throws IOException {
    final HandleSet words =
        new RowHandleSet(
            Word.commonHashLength,
            Word.commonHashOrder,
            0); // a set of url hashes where a worker thread tried to work on, but failed.
    synchronized (this.removeDelayedURLs) {
      for (final byte[] b : this.removeDelayedURLs.keySet())
        try {
          words.put(b);
        } catch (final SpaceExceededException e) {
        }
    }

    synchronized (this.removeDelayedURLs) {
      for (final byte[] b : words) {
        final HandleSet urls = this.removeDelayedURLs.remove(b);
        if (urls != null) remove(b, urls);
      }
    }
    this.countCache.clear();
  }