예제 #1
0
  /** Close all stores (closing a store automatically closes its indices). */
  public void close() throws DatabaseException {

    // Close secondary databases, then primary databases.
    supplierByCityDb.close();
    shipmentByPartDb.close();
    shipmentBySupplierDb.close();
    partDb.close();
    supplierDb.close();
    shipmentDb.close();
    // And don't forget to close the catalog and the environment.
    javaCatalog.close();
    env.close();
  }
예제 #2
0
  @After
  public void tearDown() {

    try {
      if (index1 != null) {
        index1.close();
      }
      if (index2 != null) {
        index2.close();
      }
      if (store1 != null) {
        store1.close();
      }
      if (store2 != null) {
        store2.close();
      }
      if (catalog != null) {
        catalog.close();
      }
      if (env != null) {
        env.close();
      }
    } catch (Exception e) {
      System.out.println("Ignored exception during tearDown: " + e);
    } finally {
      /* Ensure that GC can cleanup. */
      env = null;
      testEnv = null;
      catalog = null;
      store1 = null;
      store2 = null;
      index1 = null;
      index2 = null;
      factory = null;
      storeMap1 = null;
      storeMap2 = null;
      indexMap1 = null;
      indexMap2 = null;
    }
  }
 // 关闭数据库,关闭环境
 public void close() throws DatabaseException {
   if (database != null) database.close();
   if (javaCatalog != null) javaCatalog.close();
   if (env != null) env.close();
 }
예제 #4
0
 // 关闭数据库,关闭环境
 public void close() throws DatabaseException {
   database.close();
   javaCatalog.close();
   env.close();
 }
  public void testHQ() throws Exception {
    EnvironmentConfig envConfig = new EnvironmentConfig();
    envConfig.setTransactional(true);
    envConfig.setAllowCreate(true);
    File envDir = new File(getTmpDir(), "AR");
    if (envDir.exists()) {
      FileUtils.deleteDir(envDir);
    }
    envDir.mkdirs();
    Environment env = new Environment(envDir, envConfig);
    // Open the class catalog database. Create it if it does not
    // already exist.
    DatabaseConfig dbConfig = new DatabaseConfig();
    dbConfig.setAllowCreate(true);
    StoredClassCatalog catalog =
        new StoredClassCatalog(env.openDatabase(null, "classes", dbConfig));
    AdaptiveRevisitHostQueue hq = new AdaptiveRevisitHostQueue("bok.hi.is", env, catalog, 1);

    // Make the CrawlUris
    CrawlURI[] curis = {null, null, null, null};

    UURI uuri = UURIFactory.getInstance("http://bok.hi.is/1.html");
    curis[0] = new CrawlURI(uuri);
    curis[0].setVia(null);

    uuri = UURIFactory.getInstance("http://bok.hi.is/2.html");
    curis[1] = new CrawlURI(uuri);
    curis[1].setVia(null);

    uuri = UURIFactory.getInstance("http://bok.hi.is/3.html");
    curis[2] = new CrawlURI(uuri);
    curis[2].setVia(null);

    uuri = UURIFactory.getInstance("http://bok.hi.is/4.html");
    curis[3] = new CrawlURI(uuri);
    curis[3].setVia(null);

    assertTrue(
        "HQ should be empty initially", hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_EMPTY);
    assertEquals("Incorrect nextReadyTime on Empty", Long.MAX_VALUE, hq.getNextReadyTime());
    assertEquals("Initial size of HQ should be 0", 0, hq.getSize());

    assertEquals("Peek should return null when 'ready queue' is empty", null, hq.peek());

    /*
     * Add three CrawlURIs and ensures that the correct one is reported by
     * peek(); All are added later then current time!
     */

    curis[0].putLong(A_TIME_OF_NEXT_PROCESSING, System.currentTimeMillis()); // now
    curis[1].putLong(A_TIME_OF_NEXT_PROCESSING, System.currentTimeMillis() + 5000); // in 5 sec
    curis[2].putLong(A_TIME_OF_NEXT_PROCESSING, System.currentTimeMillis() + 20000); // in 20 sec.

    hq.add(curis[0], false);
    assertEquals("First CrawlURI should be top", curis[0].toString(), hq.peek().toString());
    assertTrue(
        "HQ should no longer be empty", hq.getState() != AdaptiveRevisitHostQueue.HQSTATE_EMPTY);
    assertEquals("Size of HQ should now be 1", 1, hq.getSize());

    /*
     * Invoke next and ensure that the HQ is now busy (initial valence was
     * set to 1). Also check for proper errors for a busy HQ. Such as when
     * trying to reinvoke next().
     *
     */
    CrawlURI curi = hq.next(); // Should return curis[2]
    assertEquals("next() did not return 'top' URI", curis[0].toString(), curi.toString());
    assertTrue(
        "HQ should now be busy, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_BUSY);
    try {
      hq.next();
      assertTrue("next() should throw an IllegalStateException if HQ " + "not ready", false);
    } catch (IllegalStateException e) {
      // This is supposed to happen.
    }
    assertEquals("New top URI should be null", null, hq.peek());

    hq.add(curis[1], false);
    assertEquals("Second CrawlURI should be top", curis[1].toString(), hq.peek().toString());
    assertEquals("Size of HQ should now be 2", 2, hq.getSize());

    // Return it with next fetch time in the future.
    curi.putLong(
        A_TIME_OF_NEXT_PROCESSING,
        hq.peek().getLong(A_TIME_OF_NEXT_PROCESSING) + 100000); // 100 sec behind current top.
    hq.update(curi, false, 0);
    assertEquals(
        "Second CrawlURI should be still be top", curis[1].toString(), hq.peek().toString());
    assertEquals("Size of HQ should still be 2", 2, hq.getSize());

    hq.add(curis[2], false);
    assertEquals("Second CrawlURI should still be top", curis[1].toString(), hq.peek().toString());
    assertEquals("Size of HQ should now be 3", 3, hq.getSize());

    /*
     * If there are no URIs ready, the queue should snooze, even though no
     * politeness demand has been made.
     * <p>
     * Confirms this and that it wakes up.
     */
    assertTrue(
        "HQ should be snoozed, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_SNOOZED);
    // Wait past wakeup time
    synchronized (this) {
      wait(hq.getNextReadyTime() - System.currentTimeMillis() + 100);
    }
    assertTrue(
        "HQ should now be ready, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_READY);

    /*
     * Re-adds a URI with a lower ready time which should promote it to the
     * top of the queue. Checks if this happens correctly.
     *
     * Then tests an add override which would demote it back, ensures that
     * this fails as it should (i.e. URIs time of next processing remains
     * unchanged).
     */
    curis[2].putLong(
        A_TIME_OF_NEXT_PROCESSING,
        curis[1].getLong(A_TIME_OF_NEXT_PROCESSING) - 1000); // 1 sec. prior to current top
    hq.add(curis[2], true);
    assertEquals("Size of HQ should still be 3", hq.getSize(), 3);
    assertEquals("Third CrawlURI should be now be top", curis[2].toString(), hq.peek().toString());
    curis[2].putLong(
        A_TIME_OF_NEXT_PROCESSING,
        curis[1].getLong(A_TIME_OF_NEXT_PROCESSING) + 10000); // 10 sec. later
    hq.add(curis[2], true);
    assertEquals("Size of HQ should still be 3", hq.getSize(), 3);
    assertEquals("Third CrawlURI should still top", curis[2].toString(), hq.peek().toString());

    /*
     * Invoke next and ensure that the HQ is now busy (initial valence was
     * set to 1). Also check for proper errors for a busy HQ. Such as when
     * trying to reinvoke next().
     *
     */
    curi = hq.next(); // Should return curis[2]
    assertEquals("next() did not return 'top' URI", curis[2].toString(), curi.toString());
    assertTrue(
        "HQ should now be busy, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_BUSY);
    try {
      hq.next();
      assertTrue("next() should throw an IllegalStateException if HQ " + "not ready", false);
    } catch (IllegalStateException e) {
      // This is supposed to happen.
    }
    assertEquals("New top URI", curis[1].toString(), hq.peek().toString());

    /*
     * Add a URI while HQ is busy. Check if this succeeds normally.
     *
     */

    curis[3].putLong(
        A_TIME_OF_NEXT_PROCESSING,
        curis[1].getLong(A_TIME_OF_NEXT_PROCESSING)
            - 1); // 1 msec. ahead of current top (order [2] 3 1 0)
    hq.add(curis[3], false);
    assertEquals("Size of HQ should now be 4", 4, hq.getSize());

    /*
     * Invoke update, first with an invalid URI (not the one issued by
     * next() earlier), this should fail. Then with the correct one, this
     * should succeed. Then finally test update again with an invalid URI
     * (i.e. when no HQ has no outstanding URIs, that should fail.
     *
     * At each step, proper checks are made of state and that  methods give
     * appropriate errors.
     *
     * Updated URI is given low time of next processing to put it 'in front'
     */

    try {
      hq.update(curis[1], false, 0);
      assertTrue("update() should not accept URI", false);
    } catch (IllegalStateException e) {
      // This is supposed to happen
    }

    // We do not change the 'time of next processing' on update
    // so curis[2] should again be at top of queue.
    long timeOfPolitenessWakeUp = System.currentTimeMillis() + 2000;
    hq.update(curi, true, timeOfPolitenessWakeUp); // Wake in 5 sec.
    assertTrue(
        "HQ should be snoozed, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_SNOOZED);

    try {
      hq.update(curis[2], false, 0);
      assertTrue("update() should not accept URI", false);
    } catch (IllegalStateException e) {
      // This is supposed to happen
    }
    assertEquals(
        "HQs time of next ready should reflect set wait time ",
        timeOfPolitenessWakeUp,
        hq.getNextReadyTime());

    /*
     * Check if the HQ wakes up from it's 'snoozing'
     *
     */
    // Wait past wakeup time
    synchronized (this) {
      wait(hq.getNextReadyTime() - System.currentTimeMillis() + 100);
    }
    assertTrue(
        "HQ should now be ready, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_READY);
    assertEquals(
        "HQs time of next ready should still be when it 'woken' " + "up.",
        timeOfPolitenessWakeUp,
        hq.getNextReadyTime());

    /*
     * Invoke next so that the HQ has a URI being processed. Then
     * close the HQ and reopen it to ensure that this happens normally, i.e.
     * state is recovered properly, including the restoration of the URI
     * being processed, back to the regular queue (where it should be
     * first).
     *
     * On recreating the HQ, set valence to 2.
     */
    curi = hq.next(); // Should return curis[2]
    assertEquals("next() did not return 'top' URI", curis[2].toString(), curi.toString());
    assertTrue(
        "HQ should now be busy, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_BUSY);
    hq.close();

    hq = new AdaptiveRevisitHostQueue("bok.hi.is", env, catalog, 2);

    assertEquals("Size of HQ after reopening should now be 4", 4, hq.getSize());
    assertTrue(
        "HQ should be ready on reopen, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_READY);
    assertEquals(
        "CrawlURI 'in processing' before should be top", curi.toString(), hq.peek().toString());

    /* Check if valence higher then 1 is properly handled.
     *
     * Invoke next(), check if still ready and new top URI.
     */
    curi = hq.next(); // Should return curis[2]
    assertEquals("next() did not return 'top' URI", curis[2].toString(), curi.toString());
    assertTrue(
        "HQ should still be ready, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_READY);

    /* Invoke next() again, check if now busy.
     */
    curi = hq.next(); // Should return curis[3]
    assertEquals("next() did not return 'top' URI", curis[3].toString(), curi.toString());
    assertTrue(
        "HQ should be busy, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_BUSY);
    assertEquals("Size of HQ should still be 4", 4, hq.getSize());

    /* Update() second URI issued. Confirm HQ is now ready again. URI is
     * given same time of next processing to put it 'in front'. (no snooze)
     */
    hq.update(curi, false, 0);
    assertTrue(
        "HQ should now be ready, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_READY);
    assertEquals("'updated' CrawlURI before should be top", curi.toString(), hq.peek().toString());

    /* Update() again, ensure proper state. URI is NOT placed at front of
     * queue and snooze time is given. But the HQ should not enter a
     * snoozed state because the 'other' slot is free.
     */

    hq.update(curis[2], true, System.currentTimeMillis() + 1000000); // 10sec
    curis[3].putLong(
        A_TIME_OF_NEXT_PROCESSING,
        curis[1].getLong(A_TIME_OF_NEXT_PROCESSING) + 1000); // 1 sec. behind of current top
    assertTrue(
        "HQ should still be ready, is " + hq.getStateByName(),
        hq.getState() == AdaptiveRevisitHostQueue.HQSTATE_READY);
    assertEquals("Top CrawlURI before should be unchanged", curi.toString(), hq.peek().toString());

    // TODO: Test sorting with scheduling directives.

    /*
     * Close the ARHostQueue and the Environment
     */
    hq.close();
    catalog.close();
    env.close();
    cleanUpOldFiles("AR");
  }