@Override public void open( @SuppressWarnings("rawtypes") Map stormConf, TopologyContext context, SpoutOutputCollector collector) { LOG.info( "open({}[{}]) TaskId: {}, ThisComponetTasks: {}, ThisWorkerTasks: {}", context.getThisComponentId(), context.getThisTaskIndex(), context.getThisTaskId(), context.getComponentTasks(context.getThisComponentId()), context.getThisWorkerTasks()); @SuppressWarnings("unchecked") GungnirConfig config = GungnirConfig.wrap((Map<String, Object>) stormConf.get(GUNGNIR_CONFIG)); topologyContext = context; if (config.getBoolean(TOPOLOGY_METRICS_ENABLED) && metricsMap != null) { for (Map.Entry<String, Metrics> entry : metricsMap.entrySet()) { topologyContext.registerMetric( entry.getKey(), entry.getValue(), config.getInteger(TOPOLOGY_METRICS_INTERVAL_SECS)); } } this.context.setComponent(this); incomingOperator.doPrepare(config, this.context); for (PartitionOperator partitionOperator : outgoingOperators) { SpoutDispatcher spoutDispatcher = new SpoutDispatcher(); spoutDispatcher.collector = collector; Dispatcher dispatcher = partitionOperator.getDispatcher(); if (dispatcher == null) { partitionOperator.setDispatcher(spoutDispatcher); } else if (dispatcher instanceof MultiDispatcher) { for (Dispatcher d : ((MultiDispatcher) dispatcher).getDispatchers()) { if (d instanceof FilterDispatcher) { ((FilterDispatcher) d).setDispatcher(spoutDispatcher); } } } else if (dispatcher instanceof FilterDispatcher) { ((FilterDispatcher) dispatcher).setDispatcher(spoutDispatcher); } } // TODO: parallelization // snapshotTimer = new SnapshotTimer(config.getInteger(COMPONENT_SNAPSHOT_QUEUE_SIZE), // config.getInteger(COMPONENT_SNAPSHOT_PARALLELISM)); snapshotTimer = new SnapshotTimer(getName() + "_" + topologyContext.getThisTaskIndex()); }
@Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { _collector = collector; if (_local_drpc_id == null) { _backround = Executors.newCachedThreadPool(); _futures = new LinkedList<Future<Void>>(); int numTasks = context.getComponentTasks(context.getThisComponentId()).size(); int index = context.getThisTaskIndex(); int port = Utils.getInt(conf.get(Config.DRPC_INVOCATIONS_PORT)); List<String> servers = (List<String>) conf.get(Config.DRPC_SERVERS); if (servers == null || servers.isEmpty()) { throw new RuntimeException("No DRPC servers configured for topology"); } if (numTasks < servers.size()) { for (String s : servers) { _futures.add(_backround.submit(new Adder(s, port, conf))); } } else { int i = index % servers.size(); _futures.add(_backround.submit(new Adder(servers.get(i), port, conf))); } } }
@Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { _collector = collector; Map stateConf = new HashMap(conf); List<String> zkServers = _spoutConfig.zkServers; if (zkServers == null) zkServers = (List<String>) conf.get(Config.STORM_ZOOKEEPER_SERVERS); Integer zkPort = _spoutConfig.zkPort; if (zkPort == null) zkPort = ((Number) conf.get(Config.STORM_ZOOKEEPER_PORT)).intValue(); String zkRoot = _spoutConfig.zkRoot; stateConf.put(Config.TRANSACTIONAL_ZOOKEEPER_SERVERS, zkServers); stateConf.put(Config.TRANSACTIONAL_ZOOKEEPER_PORT, zkPort); stateConf.put(Config.TRANSACTIONAL_ZOOKEEPER_ROOT, zkRoot); Config componentConf = new Config(); componentConf.registerSerialization(ZooMeta.class); // using TransactionalState like this is a hack _state = TransactionalState.newUserState(stateConf, _spoutConfig.id, componentConf); _partitions = new KafkaPartitionConnections(_spoutConfig); int totalPartitions = _spoutConfig.partitionsPerHost * _spoutConfig.hosts.size(); int numTasks = context.getComponentTasks(context.getThisComponentId()).size(); for (int p = context.getThisTaskIndex(); p < totalPartitions; p += numTasks) { _managedPartitions.add(p); _managers.put(p, new PartitionManager(p)); } }
@SuppressWarnings({"rawtypes", "unchecked"}) @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { _collector = collector; this.conf = new Config(); this.conf.putAll(stormConf); checkConfiguration(); this.taskIndex = context.getThisTaskIndex(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); long start = System.currentTimeMillis(); LOG.info("[Fetcher #{}] : starting at {}", taskIndex, sdf.format(start)); // Register a "MultiCountMetric" to count different events in this bolt // Storm will emit the counts every n seconds to a special bolt via a // system stream // The data can be accessed by registering a "MetricConsumer" in the // topology this.eventCounter = context.registerMetric("fetcher_counter", new MultiCountMetric(), 10); this.averagedMetrics = context.registerMetric("fetcher_average", new MultiReducedMetric(new MeanReducer()), 10); this.perSecMetrics = context.registerMetric( "fetcher_average_persec", new MultiReducedMetric(new PerSecondReducer()), 10); protocolFactory = new ProtocolFactory(conf); String urlconfigfile = ConfUtils.getString(conf, "urlfilters.config.file", "urlfilters.json"); if (urlconfigfile != null) try { urlFilters = new URLFilters(conf, urlconfigfile); } catch (IOException e) { LOG.error("Exception caught while loading the URLFilters"); throw new RuntimeException("Exception caught while loading the URLFilters", e); } metadataTransfer = MetadataTransfer.getInstance(stormConf); allowRedirs = ConfUtils.getBoolean( stormConf, com.digitalpebble.storm.crawler.Constants.AllowRedirParamName, true); sitemapsAutoDiscovery = ConfUtils.getBoolean(stormConf, "sitemap.discovery", false); queueMode = ConfUtils.getString(conf, "fetcher.queue.mode", QUEUE_MODE_HOST); // check that the mode is known if (!queueMode.equals(QUEUE_MODE_IP) && !queueMode.equals(QUEUE_MODE_DOMAIN) && !queueMode.equals(QUEUE_MODE_HOST)) { LOG.error("Unknown partition mode : {} - forcing to byHost", queueMode); queueMode = QUEUE_MODE_HOST; } LOG.info("Using queue mode : {}", queueMode); this.crawlDelay = (long) (ConfUtils.getFloat(conf, "fetcher.server.delay", 1.0f) * 1000); this.maxCrawlDelay = (long) ConfUtils.getInt(conf, "fetcher.max.crawl.delay", 30) * 1000; }
@SuppressWarnings("rawtypes") @Override public void prepare(Map conf, TopologyContext context, OutputCollector collector) { this.collector = collector; this.index = context.getThisTaskIndex(); }