コード例 #1
0
  /**
   * Delete all CrawlURIs matching the given expression.
   *
   * @param match
   * @param queue
   * @param headKey
   * @return count of deleted items
   * @throws DatabaseException
   * @throws DatabaseException
   */
  public long deleteMatchingFromQueue(String match, String queue, DatabaseEntry headKey)
      throws DatabaseException {
    long deletedCount = 0;
    Pattern pattern = Pattern.compile(match);
    DatabaseEntry key = headKey;
    DatabaseEntry value = new DatabaseEntry();
    Cursor cursor = null;
    try {
      cursor = pendingUrisDB.openCursor(null, null);
      OperationStatus result = cursor.getSearchKeyRange(headKey, value, null);

      while (result == OperationStatus.SUCCESS) {
        if (value.getData().length > 0) {
          CrawlURI curi = (CrawlURI) crawlUriBinding.entryToObject(value);
          if (!curi.getClassKey().equals(queue)) {
            // rolled into next queue; finished with this queue
            break;
          }
          if (pattern.matcher(curi.toString()).matches()) {
            cursor.delete();
            deletedCount++;
          }
        }
        result = cursor.getNext(key, value, null);
      }
    } finally {
      if (cursor != null) {
        cursor.close();
      }
    }

    return deletedCount;
  }
コード例 #2
0
 /**
  * Calculate the insertKey that places a CrawlURI in the desired spot. First bytes are always
  * classKey (usu. host) based -- ensuring grouping by host -- terminated by a zero byte. Then 8
  * bytes of data ensuring desired ordering within that 'queue' are used. The first byte of these 8
  * is priority -- allowing 'immediate' and 'soon' items to sort above regular. Next 1 byte is
  * 'precedence'. Last 6 bytes are ordinal serial number, ensuring earlier-discovered URIs sort
  * before later.
  *
  * <p>NOTE: Dangers here are: (1) priorities or precedences over 2^7 (signed byte comparison) (2)
  * ordinals over 2^48
  *
  * <p>Package access & static for testing purposes.
  *
  * @param curi
  * @return a DatabaseEntry key for the CrawlURI
  */
 static DatabaseEntry calculateInsertKey(CrawlURI curi) {
   byte[] classKeyBytes = null;
   int len = 0;
   classKeyBytes = curi.getClassKey().getBytes(Charsets.UTF_8);
   len = classKeyBytes.length;
   byte[] keyData = new byte[len + 9];
   System.arraycopy(classKeyBytes, 0, keyData, 0, len);
   keyData[len] = 0;
   long ordinalPlus = curi.getOrdinal() & 0x0000FFFFFFFFFFFFL;
   ordinalPlus = ((long) curi.getSchedulingDirective() << 56) | ordinalPlus;
   long precedence = Math.min(curi.getPrecedence(), 127);
   ordinalPlus = (((precedence) & 0xFFL) << 48) | ordinalPlus;
   ArchiveUtils.longIntoByteArray(ordinalPlus, keyData, len + 1);
   return new DatabaseEntry(keyData);
 }
コード例 #3
0
  /**
   * @param m marker or null to start with first entry
   * @param maxMatches
   * @return list of matches starting from marker position
   * @throws DatabaseException
   */
  public CompositeData getFrom(String m, int maxMatches, Pattern pattern, boolean verbose)
      throws DatabaseException {
    int matches = 0;
    int tries = 0;
    ArrayList<String> results = new ArrayList<String>(maxMatches);

    DatabaseEntry key;
    if (m == null) {
      key = getFirstKey();
    } else {
      byte[] marker = m.getBytes(); // = FrontierJMXTypes.fromString(m);
      key = new DatabaseEntry(marker);
    }

    DatabaseEntry value = new DatabaseEntry();

    Cursor cursor = null;
    OperationStatus result = null;
    try {
      cursor = pendingUrisDB.openCursor(null, null);
      result = cursor.getSearchKey(key, value, null);

      while (matches < maxMatches && result == OperationStatus.SUCCESS) {
        if (value.getData().length > 0) {
          CrawlURI curi = (CrawlURI) crawlUriBinding.entryToObject(value);
          if (pattern.matcher(curi.toString()).matches()) {
            if (verbose) {
              results.add("[" + curi.getClassKey() + "] " + curi.shortReportLine());
            } else {
              results.add(curi.toString());
            }
            matches++;
          }
          tries++;
        }
        result = cursor.getNext(key, value, null);
      }
    } finally {
      if (cursor != null) {
        cursor.close();
      }
    }

    if (result != OperationStatus.SUCCESS) {
      // end of scan
      m = null;
    } else {
      m = new String(key.getData()); // = FrontierJMXTypes.toString(key.getData());
    }

    String[] arr = results.toArray(new String[results.size()]);
    CompositeData cd;
    try {
      cd =
          new CompositeDataSupport(
              /*FrontierJMXTypes.URI_LIST_DATA*/ null,
              new String[] {"list", "marker"},
              new Object[] {arr, m});
    } catch (OpenDataException e) {
      throw new IllegalStateException(e);
    }
    return cd;
  }