示例#1
0
  /**
   * The configuration mechanism, central component of the factory
   *
   * @param type should be one of the follows: * SIMPLE For the simplest (and probably fastest)
   *     implementation * MERC_1 For the centralized URLFrontier described in the Mercator paper *
   *     MERC_2 For an improved URLFrontier, close to the one in the Mercator * CORALI For the
   *     Corallia Design, a new, highly scalable approach
   * @return The concrete URLFrontier implementation
   */
  public static URLFrontier get(FrontierType type) {

    int crawlerThreadNumber = Configuration.getInstance().getCrawlerThreadNumber();

    switch (type) {
      case FAST:
        FastFrontier ff = new FastFrontier();
        ff.load();
        return ff;
      case BOOST:
        FastFrontierBoost ffb = new FastFrontierBoost();
        ffb.load();
        return ffb;
      case SIMPLE:
        SimpleQueueFrontier sqf = new SimpleQueueFrontier();
        sqf.load();
        return sqf;
      case MERC_1:
        /*
         * TODO 10 is the number of priority levels
         * This is specific to the mercator frontier so should probably not be in Configuration.
         * Where should it go?
         */
        MercatorCentralized mc = new MercatorCentralized(10, crawlerThreadNumber);
        mc.load();
        return mc;
      case MERC_2:
        MercatorDistributed md = new MercatorDistributed();
        md.load();
        return md;
      case CORALI:
        CoralliaFrontier cf = new CoralliaFrontier();
        cf.load();
        return cf;

      case POLITE_SIMPLE:
        PoliteSimpleQueue psq = new PoliteSimpleQueue();
        psq.load();
        return psq;
      default:
        SimpleQueueFrontier defaultFrontier = new SimpleQueueFrontier();
        defaultFrontier.load();
        return defaultFrontier;
    }
  }
示例#2
0
 @BeforeClass
 public static void setUpBeforeClass() {
   Configuration.getInstance().setBerkeleyDBRoot("db/test");
 }