Ejemplo n.º 1
0
  @Override
  public void open(
      @SuppressWarnings("rawtypes") Map stormConf,
      TopologyContext context,
      SpoutOutputCollector collector) {
    LOG.info(
        "open({}[{}]) TaskId: {}, ThisComponetTasks: {}, ThisWorkerTasks: {}",
        context.getThisComponentId(),
        context.getThisTaskIndex(),
        context.getThisTaskId(),
        context.getComponentTasks(context.getThisComponentId()),
        context.getThisWorkerTasks());

    @SuppressWarnings("unchecked")
    GungnirConfig config = GungnirConfig.wrap((Map<String, Object>) stormConf.get(GUNGNIR_CONFIG));

    topologyContext = context;

    if (config.getBoolean(TOPOLOGY_METRICS_ENABLED) && metricsMap != null) {
      for (Map.Entry<String, Metrics> entry : metricsMap.entrySet()) {
        topologyContext.registerMetric(
            entry.getKey(), entry.getValue(), config.getInteger(TOPOLOGY_METRICS_INTERVAL_SECS));
      }
    }

    this.context.setComponent(this);

    incomingOperator.doPrepare(config, this.context);

    for (PartitionOperator partitionOperator : outgoingOperators) {
      SpoutDispatcher spoutDispatcher = new SpoutDispatcher();
      spoutDispatcher.collector = collector;

      Dispatcher dispatcher = partitionOperator.getDispatcher();
      if (dispatcher == null) {
        partitionOperator.setDispatcher(spoutDispatcher);
      } else if (dispatcher instanceof MultiDispatcher) {
        for (Dispatcher d : ((MultiDispatcher) dispatcher).getDispatchers()) {
          if (d instanceof FilterDispatcher) {
            ((FilterDispatcher) d).setDispatcher(spoutDispatcher);
          }
        }
      } else if (dispatcher instanceof FilterDispatcher) {
        ((FilterDispatcher) dispatcher).setDispatcher(spoutDispatcher);
      }
    }

    // TODO: parallelization
    // snapshotTimer = new SnapshotTimer(config.getInteger(COMPONENT_SNAPSHOT_QUEUE_SIZE),
    //     config.getInteger(COMPONENT_SNAPSHOT_PARALLELISM));
    snapshotTimer = new SnapshotTimer(getName() + "_" + topologyContext.getThisTaskIndex());
  }
Ejemplo n.º 2
0
  @Override
  public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    _collector = collector;
    if (_local_drpc_id == null) {
      _backround = Executors.newCachedThreadPool();
      _futures = new LinkedList<Future<Void>>();

      int numTasks = context.getComponentTasks(context.getThisComponentId()).size();
      int index = context.getThisTaskIndex();

      int port = Utils.getInt(conf.get(Config.DRPC_INVOCATIONS_PORT));
      List<String> servers = (List<String>) conf.get(Config.DRPC_SERVERS);
      if (servers == null || servers.isEmpty()) {
        throw new RuntimeException("No DRPC servers configured for topology");
      }

      if (numTasks < servers.size()) {
        for (String s : servers) {
          _futures.add(_backround.submit(new Adder(s, port, conf)));
        }
      } else {
        int i = index % servers.size();
        _futures.add(_backround.submit(new Adder(servers.get(i), port, conf)));
      }
    }
  }
  @Override
  public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    _collector = collector;
    Map stateConf = new HashMap(conf);

    List<String> zkServers = _spoutConfig.zkServers;
    if (zkServers == null) zkServers = (List<String>) conf.get(Config.STORM_ZOOKEEPER_SERVERS);

    Integer zkPort = _spoutConfig.zkPort;
    if (zkPort == null) zkPort = ((Number) conf.get(Config.STORM_ZOOKEEPER_PORT)).intValue();

    String zkRoot = _spoutConfig.zkRoot;

    stateConf.put(Config.TRANSACTIONAL_ZOOKEEPER_SERVERS, zkServers);
    stateConf.put(Config.TRANSACTIONAL_ZOOKEEPER_PORT, zkPort);
    stateConf.put(Config.TRANSACTIONAL_ZOOKEEPER_ROOT, zkRoot);

    Config componentConf = new Config();
    componentConf.registerSerialization(ZooMeta.class);

    // using TransactionalState like this is a hack
    _state = TransactionalState.newUserState(stateConf, _spoutConfig.id, componentConf);
    _partitions = new KafkaPartitionConnections(_spoutConfig);

    int totalPartitions = _spoutConfig.partitionsPerHost * _spoutConfig.hosts.size();
    int numTasks = context.getComponentTasks(context.getThisComponentId()).size();
    for (int p = context.getThisTaskIndex(); p < totalPartitions; p += numTasks) {
      _managedPartitions.add(p);
      _managers.put(p, new PartitionManager(p));
    }
  }
  @SuppressWarnings({"rawtypes", "unchecked"})
  @Override
  public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {

    _collector = collector;
    this.conf = new Config();
    this.conf.putAll(stormConf);

    checkConfiguration();

    this.taskIndex = context.getThisTaskIndex();

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
    long start = System.currentTimeMillis();
    LOG.info("[Fetcher #{}] : starting at {}", taskIndex, sdf.format(start));

    // Register a "MultiCountMetric" to count different events in this bolt
    // Storm will emit the counts every n seconds to a special bolt via a
    // system stream
    // The data can be accessed by registering a "MetricConsumer" in the
    // topology
    this.eventCounter = context.registerMetric("fetcher_counter", new MultiCountMetric(), 10);

    this.averagedMetrics =
        context.registerMetric("fetcher_average", new MultiReducedMetric(new MeanReducer()), 10);

    this.perSecMetrics =
        context.registerMetric(
            "fetcher_average_persec", new MultiReducedMetric(new PerSecondReducer()), 10);

    protocolFactory = new ProtocolFactory(conf);

    String urlconfigfile = ConfUtils.getString(conf, "urlfilters.config.file", "urlfilters.json");

    if (urlconfigfile != null)
      try {
        urlFilters = new URLFilters(conf, urlconfigfile);
      } catch (IOException e) {
        LOG.error("Exception caught while loading the URLFilters");
        throw new RuntimeException("Exception caught while loading the URLFilters", e);
      }

    metadataTransfer = MetadataTransfer.getInstance(stormConf);

    allowRedirs =
        ConfUtils.getBoolean(
            stormConf, com.digitalpebble.storm.crawler.Constants.AllowRedirParamName, true);

    sitemapsAutoDiscovery = ConfUtils.getBoolean(stormConf, "sitemap.discovery", false);

    queueMode = ConfUtils.getString(conf, "fetcher.queue.mode", QUEUE_MODE_HOST);
    // check that the mode is known
    if (!queueMode.equals(QUEUE_MODE_IP)
        && !queueMode.equals(QUEUE_MODE_DOMAIN)
        && !queueMode.equals(QUEUE_MODE_HOST)) {
      LOG.error("Unknown partition mode : {} - forcing to byHost", queueMode);
      queueMode = QUEUE_MODE_HOST;
    }
    LOG.info("Using queue mode : {}", queueMode);

    this.crawlDelay = (long) (ConfUtils.getFloat(conf, "fetcher.server.delay", 1.0f) * 1000);

    this.maxCrawlDelay = (long) ConfUtils.getInt(conf, "fetcher.max.crawl.delay", 30) * 1000;
  }
 @SuppressWarnings("rawtypes")
 @Override
 public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
   this.collector = collector;
   this.index = context.getThisTaskIndex();
 }