/** * 构造一个非事务性的trident-topology * * @return 返回topology */ private static StormTopology buildSimpleTopology() { // 10是每批次的元组数量 FakeTweetSpout spout = new FakeTweetSpout(10); // 创建了一个TridentTopology对象,之后基于这个对象创建计算拓扑 TridentTopology topology = new TridentTopology(); /** * TridentTopology实例的newStream方法定义一个新的数据流,参数是一个spout .shuffle(): 传入的元组随机进入后续的处理节点 .each(new * Fields("text", "country"), new TridentUtility.TweetFilter("#FIFA")): * 处理含有text和country字段的每个元组,进行过滤操作,含有#FIFA的留下,不含有的,直接移除 .groupBy(new Fields("country")): * 按照country进行分组 .aggregate(new Fields("country"), new Count(), new Fields("count")): * 处理含有country字段的元组进行计数, 新输出的元组只含有count字段 .each(new Fields("count"), new * TridentUtility.Print()): 处理含有count字段的元组,进行Print过滤操作, 这里只会打印出一个数字, 问题: * 如果我希望知道group的名字呢?我只是简单的添加了.each(new Fields("country", "count"), new TridentUtility.Print()) * .parallelismHint(2); 指定并行度,怎么个计算方法??? */ topology .newStream("faketweetspout", spout) .shuffle() .each(new Fields("text", "country"), new TridentUtility.TweetFilter("#FIFA")) .groupBy(new Fields("country")) .aggregate(new Fields("country"), new Count(), new Fields("count")) .each(new Fields("country", "count"), new TridentUtility.Print()) .parallelismHint(2); return topology.build(); }
public static StormTopology buildTopology(LocalDRPC drpc, TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user")) .project(new Fields("content", "user", "status")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("status"), new OnlyGeo()) .each( new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName")) .groupBy(new Fields("country", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")); topology .newDRPCStream("location_hashtag_count", drpc) .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName")) .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count")) .groupBy(new Fields("country")) .aggregate( new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3, "count", true), new Fields("contentName", "count")); return topology.build(); }
public static StormTopology buildTopology() { TridentTopology topology = new TridentTopology(); DiagnosisEventSpout spout = new DiagnosisEventSpout(); Stream inputStream = topology.newStream("event", spout); inputStream // Filter for critical events. .each(new Fields("event"), new DiseaseFilter()) // Locate the closest city .each(new Fields("event"), new CityAssignment(), new Fields("city")) // Derive the hour segment .each( new Fields("event", "city"), new HourAssignment(), new Fields("hour", "cityDiseaseHour")) // Group occurrences in same city and hour .groupBy(new Fields("cityDiseaseHour")) // Count occurrences and persist the results. .persistentAggregate(new OutbreakTrendFactory(), new Count(), new Fields("count")) .newValuesStream() // Detect an outbreak .each( new Fields("cityDiseaseHour", "count"), new OutbreakDetector(), new Fields("alert")) // Dispatch the alert .each(new Fields("alert"), new DispatchAlert(), new Fields()); return topology.build(); }
@Test public void testTridentTopology() throws Exception { Session session = cassandraCQLUnit.session; String[] stationIds = {"station-1", "station-2", "station-3"}; for (int i = 1; i < 4; i++) { ResultSet resultSet = session.execute( "INSERT INTO weather.station(id, name) VALUES(?, ?)", stationIds[i - 1], "Foo-Station-" + new Random().nextInt()); } ResultSet rows = cassandraCQLUnit.session.execute("SELECT * FROM weather.station"); for (Row row : rows) { System.out.println("####### row = " + row); } WeatherBatchSpout weatherBatchSpout = new WeatherBatchSpout( new Fields("weather_station_id", "temperature", "event_time"), 3, stationIds); TridentTopology topology = new TridentTopology(); Stream stream = topology.newStream("cassandra-trident-stream", weatherBatchSpout); CassandraStateFactory insertValuesStateFactory = getInsertTemperatureStateFactory(); CassandraStateFactory selectWeatherStationStateFactory = getSelectWeatherStationStateFactory(); TridentState selectState = topology.newStaticState(selectWeatherStationStateFactory); stream = stream.stateQuery( selectState, new Fields("weather_station_id"), new CassandraQuery(), new Fields("name")); stream = stream.each(new Fields("name"), new PrintFunction(), new Fields("name_x")); stream.partitionPersist( insertValuesStateFactory, new Fields("weather_station_id", "name", "event_time", "temperature"), new CassandraStateUpdater(), new Fields()); StormTopology stormTopology = topology.build(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("wordCounter", getConfig(), stormTopology); Thread.sleep(30 * 1000); rows = cassandraCQLUnit.session.execute("SELECT * FROM weather.temperature"); Assert.assertTrue(rows.iterator().hasNext()); // basic sanity check cluster.killTopology("wordCounter"); cluster.shutdown(); }
public static void main(String[] args) throws Exception { if (args.length < 3) throw new RuntimeException("requires 3 args: toponame, host, streamName [workerCnt]"); String topoName = args[0]; String xaphost = args[1]; String streamName = args[2]; int workerCnt = 4; if (args.length > 3) workerCnt = Integer.parseInt(args[3]); log.info(String.format("executing wordcount with %s %s %s", topoName, xaphost, streamName)); XAPConfig config = new XAPConfig(); config.setBatchSize(1000); config.setStreamName(streamName); config.setXapHost(xaphost); config.setFields("sentence"); config.setCollectStats(true); Config conf = new Config(); // conf.setDebug(true); XAPTridentSpout spout = new XAPTridentSpout(config); TridentTopology topology = new TridentTopology(); TridentState wordCounts = topology .newStream("spout1", spout) .each(new Fields("sentence"), new SplitLarge(6), new Fields("word")) .groupBy(new Fields("word")) .persistentAggregate( XAPState2.nonTransactional( String.format("jini://*/*/streamspace?locators=%s", xaphost), true), new Count(), new Fields("count")); if (args != null && args.length > 0) { conf.setNumWorkers(workerCnt); StormSubmitter.submitTopology(topoName, conf, topology.build()); } else { conf.setMaxTaskParallelism(3); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("word-count", conf, topology.build()); Thread.sleep(10000); cluster.shutdown(); } }
@Override protected StormTopology buildTopology(EventHubSpout eventHubSpout) { TridentTopology topology = new TridentTopology(); OpaqueTridentEventHubSpout spout = new OpaqueTridentEventHubSpout(spoutConfig); TridentState state = topology .newStream("stream-" + spoutConfig.getTopologyName(), spout) .parallelismHint(spoutConfig.getPartitionCount()) .aggregate(new Count(), new Fields("partial-count")) .persistentAggregate( new MemoryMapState.Factory(), new Fields("partial-count"), new Sum(), new Fields("count")); state.newValuesStream().each(new Fields("count"), new LoggingFilter("got count: ", 10000)); return topology.build(); }
private TridentState addTridentState(TridentTopology tridentTopology) { return tridentTopology .newStream("spout1", createKafkaSpout()) .parallelismHint(1) .each(new Fields("str"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) .parallelismHint(1); }
private Stream addDRPCStream( TridentTopology tridentTopology, TridentState state, LocalDRPC drpc) { return tridentTopology .newDRPCStream("words", drpc) .each(new Fields("args"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .stateQuery(state, new Fields("word"), new MapGet(), new Fields("count")) .each(new Fields("count"), new FilterNull()) .project(new Fields("word", "count")); }
public static StormTopology buildTopology(LocalDRPC drpc) { TridentTopology topology = new TridentTopology(); TridentState urlToTweeters = topology.newStaticState(new StaticSingleKeyMapState.Factory(TWEETERS_DB)); TridentState tweetersToFollowers = topology.newStaticState(new StaticSingleKeyMapState.Factory(FOLLOWERS_DB)); topology .newDRPCStream("reach", drpc) .stateQuery(urlToTweeters, new Fields("args"), new MapGet(), new Fields("tweeters")) .each(new Fields("tweeters"), new ExpandList(), new Fields("tweeter")) .shuffle() .stateQuery( tweetersToFollowers, new Fields("tweeter"), new MapGet(), new Fields("followers")) .each(new Fields("followers"), new ExpandList(), new Fields("follower")) .groupBy(new Fields("follower")) .aggregate(new One(), new Fields("one")) .aggregate(new Fields("one"), new Sum(), new Fields("reach")); return topology.build(); }
protected long benchmark(Options options, int batchSize, long duration, int nbUsers, int nbItems) throws InterruptedException { long completedBatchs = 0; TridentTopology topology = new TridentTopology(); RandomBinaryPreferencesSpout preferencesSpout = new RandomBinaryPreferencesSpout(batchSize, nbUsers, nbItems); Stream preferenceStream = topology.newStream("preferences", preferencesSpout); // Create collaborative filtering topology TridentCollaborativeFilteringBuilder builder = new TridentCollaborativeFilteringBuilder(); builder.use(topology).with(options).process(preferenceStream).build(); // Submit and wait topology cluster.submitTopology(TOPOLOGY_NAME, new Config(), topology.build()); Thread.sleep(duration * 1000); completedBatchs = preferencesSpout.getCompletedBatchCount(); RandomBinaryPreferencesSpout.resetStaticCounts(); return completedBatchs; }
@SuppressWarnings({"unchecked", "rawtypes"}) private void testCassandraState(TransactionType txType) throws Exception { FixedBatchSpout spout = new FixedBatchSpout( new Fields("sentence"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat")); spout.setCycle(false); TridentTopology topology = new TridentTopology(); HashMap<String, Object> clientConfig = new HashMap<String, Object>(); clientConfig.put(StormCassandraConstants.CASSANDRA_HOST, "localhost:9160"); clientConfig.put(StormCassandraConstants.CASSANDRA_KEYSPACE, KEYSPACE); Config config = new Config(); config.setMaxSpoutPending(25); config.put("cassandra.config", clientConfig); StateFactory cassandraStateFactory = null; Options options = null; switch (txType) { case TRANSACTIONAL: options = new Options<TransactionalValue>(); options.columnFamily = "transactional"; cassandraStateFactory = DefaultCassandraState.transactional(options); break; case OPAQUE: options = new Options<OpaqueValue>(); options.columnFamily = "opaque"; cassandraStateFactory = DefaultCassandraState.opaque(options); break; case NON_TRANSACTIONAL: options = new Options<Object>(); options.columnFamily = "nontransactional"; cassandraStateFactory = DefaultCassandraState.nonTransactional(options); break; } TridentState wordCounts = topology .newStream("spout1", spout) .each(new Fields("sentence"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .persistentAggregate(cassandraStateFactory, new Count(), new Fields("count")) .parallelismHint(1); LocalDRPC client = new LocalDRPC(); topology .newDRPCStream("words", client) .each(new Fields("args"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) .each(new Fields("count"), new FilterNull()) .aggregate(new Fields("count"), new Sum(), new Fields("sum")); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("test", config, topology.build()); Thread.sleep(5000); assertEquals("[[5]]", client.execute("words", "cat dog the man")); // 5 assertEquals("[[0]]", client.execute("words", "cat")); // 0 assertEquals("[[0]]", client.execute("words", "dog")); // 0 assertEquals("[[4]]", client.execute("words", "the")); // 4 assertEquals("[[1]]", client.execute("words", "man")); // 1 cluster.shutdown(); client.shutdown(); }
/** * Creates a trident topology that consumes sentences from the kafka "test" topic using a {@link * TransactionalTridentKafkaSpout} computes the word count and stores it in a {@link * MemoryMapState}. A DRPC stream is then created to query the word counts. * * @param drpc * @return */ public StormTopology buildConsumerTopology(LocalDRPC drpc) { TridentTopology tridentTopology = new TridentTopology(); addDRPCStream(tridentTopology, addTridentState(tridentTopology), drpc); return tridentTopology.build(); }