@Override public void populateDAG(DAG dag, Configuration conf) { KafkaSinglePortInputOperator in = dag.addOperator("kafkaIn", new KafkaSinglePortInputOperator()); in.setInitialOffset(AbstractKafkaInputOperator.InitialOffset.EARLIEST.name()); LineOutputOperator out = dag.addOperator("fileOut", new LineOutputOperator()); dag.addStream("data", in.outputPort, out.input); }
@Override public void populateDAG(DAG dag, Configuration configuration) { NBTestInputOperator inp = (NBTestInputOperator) dag.addOperator("vectorize", new NBTestInputOperator()); ClassifierOperator classifier = (ClassifierOperator) dag.addOperator("classifier", new ClassifierOperator()); ConsoleOutputOperator out = (ConsoleOutputOperator) dag.addOperator("console", new ConsoleOutputOperator()); dag.addStream("vectorize_classifier", inp.data, classifier.input).setLocality(this.locality); dag.addStream("classifier_console", classifier.output, out.input).setLocality(this.locality); }
@Override public void populateDAG(DAG dag, Configuration conf) { HBaseRowStringGenerator row = dag.addOperator("rand", new HBaseRowStringGenerator()); HBaseCsvMappingPutOperator csvMappingPutOperator = dag.addOperator("HBaseoper", new HBaseCsvMappingPutOperator()); csvMappingPutOperator.getStore().setTableName("table1"); csvMappingPutOperator.getStore().setZookeeperQuorum("127.0.0.1"); csvMappingPutOperator.getStore().setZookeeperClientPort(2181); csvMappingPutOperator.setMappingString("colfam0.street,colfam0.city,colfam0.state,row"); dag.addStream("hbasestream", row.outputPort, csvMappingPutOperator.input).setLocality(locality); }
/** * Test AbstractKafkaSinglePortInputOperator (i.e. an input adapter for Kafka, aka consumer). This * module receives data from an outside test generator through Kafka message bus and feed that * data into Malhar streaming platform. * * <p>[Generate message and send that to Kafka message bus] ==> [Receive that message through * Kafka input adapter(i.e. consumer) and send using emitTuples() interface on output port during * onMessage call] * * @throws Exception */ public void testKafkaInputOperator( int sleepTime, final int totalCount, KafkaConsumer consumer, boolean isValid) throws Exception { // initial the latch for this test latch = new CountDownLatch(1); // Start producer KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC); p.setSendCount(totalCount); new Thread(p).start(); // Create DAG for testing. LocalMode lma = LocalMode.newInstance(); DAG dag = lma.getDAG(); // Create KafkaSinglePortStringInputOperator KafkaSinglePortStringInputOperator node = dag.addOperator("Kafka message consumer", KafkaSinglePortStringInputOperator.class); consumer.setTopic(TEST_TOPIC); if (isValid) { Set<String> brokerSet = new HashSet<String>(); brokerSet.add("localhost:9092"); consumer.setBrokerSet(brokerSet); } node.setConsumer(consumer); // Create Test tuple collector CollectorModule<String> collector = dag.addOperator("TestMessageCollector", new CollectorModule<String>()); // Connect ports dag.addStream("Kafka message", node.outputPort, collector.inputPort) .setLocality(Locality.CONTAINER_LOCAL); // Create local cluster final LocalMode.Controller lc = lma.getController(); lc.setHeartbeatMonitoringEnabled(false); lc.runAsync(); // Wait 30s for consumer finish consuming all the messages Assert.assertTrue("TIMEOUT: 30s ", latch.await(30000, TimeUnit.MILLISECONDS)); // Check results Assert.assertEquals("Collections size", 1, collections.size()); Assert.assertEquals("Tuple count", totalCount, collections.get(collector.inputPort.id).size()); logger.debug( String.format( "Number of emitted tuples: %d", collections.get(collector.inputPort.id).size())); p.close(); lc.shutdown(); }
@Override public void populateDAG(DAG dag, Configuration conf) { // Sample DAG with 2 operators // Replace this code with the DAG you want to build RandomNumberGenerator randomGenerator = dag.addOperator("randomGenerator", RandomNumberGenerator.class); randomGenerator.setNumTuples(500); TupleReceiver tupleReceiver = dag.addOperator("console", new TupleReceiver()); dag.addStream("randomData", randomGenerator.out, tupleReceiver.input); }
@Override public void populateDAG(DAG dag, Configuration conf) { int maxValue = 1000; RandomEventGenerator rand = dag.addOperator("rand", new RandomEventGenerator()); rand.setMinvalue(0); rand.setMaxvalue(maxValue); rand.setTuplesBlast(200); CouchBaseOutputOperator couchbaseOutput = dag.addOperator("couchbaseOutput", new CouchBaseOutputOperator()); // couchbaseOutput.getStore().setBucket("default"); // couchbaseOutput.getStore().setPassword(""); dag.addStream("ss", rand.integer_data, couchbaseOutput.input).setLocality(locality); }
@Override public void populateDAG(DAG dag, Configuration conf) { // Sample DAG with 2 operators // Replace this code with the DAG you want to build SeedEventGenerator seedGen = dag.addOperator("seedGen", SeedEventGenerator.class); seedGen.setSeedstart(1); seedGen.setSeedend(10); seedGen.addKeyData("x", 0, 10); seedGen.addKeyData("y", 0, 100); ConsoleOutputOperator cons = dag.addOperator("console", new ConsoleOutputOperator()); cons.setStringFormat("hello: %s"); dag.addStream("seeddata", seedGen.val_list, cons.input).setLocality(Locality.CONTAINER_LOCAL); }
@Override @SuppressWarnings("unchecked") public void populateDAG(DAG dag, Configuration conf) { RandomWordGenerator randomWordGenerator = new RandomWordGenerator(); randomWordGenerator.setTuplesPerWindow(2); dag.addOperator("random", randomWordGenerator); if (maxLength != null) { fsWriter.setMaxLength(maxLength); } fsWriter.setFilePath(testDir.getPath()); dag.addOperator("fswriter", fsWriter); dag.addStream("fswriterstream", randomWordGenerator.output, fsWriter.input); }
@Override public void populateDAG(DAG dag, Configuration conf) { // Create ActiveMQStringSinglePortOutputOperator SpillableTestInputOperator input = new SpillableTestInputOperator(); input.batchSize = 100; input.sleepBetweenBatch = 0; input = dag.addOperator("input", input); SpillableTestOperator testOperator = new SpillableTestOperator(); testOperator.store = createStore(conf); testOperator.shutdownCount = -1; testOperator = dag.addOperator("test", testOperator); // Connect ports dag.addStream("stream", input.output, testOperator.input) .setLocality(DAG.Locality.CONTAINER_LOCAL); }
@Override public void populateDAG(DAG dag, Configuration conf) { int maxValue = 30000; RandomEventGenerator rand = dag.addOperator("rand", new RandomEventGenerator()); rand.setMinvalue(0); rand.setMaxvalue(maxValue); RoundRobinHashMap<String, Object> rrhm = dag.addOperator("rrhm", new RoundRobinHashMap<String, Object>()); rrhm.setKeys(new String[] {"x", "y"}); JavaScriptOperator calc = dag.addOperator("picalc", new JavaScriptOperator()); calc.setPassThru(false); calc.put("i", 0); calc.put("count", 0); calc.addSetupScript( "function pi() { if (x*x+y*y <= " + maxValue * maxValue + ") { i++; } count++; return i / count * 4; }"); calc.setInvoke("pi"); dag.addStream("rand_rrhm", rand.integer_data, rrhm.data); dag.addStream("rrhm_calc", rrhm.map, calc.inBindings); ConsoleOutputOperator console = dag.addOperator("console", new ConsoleOutputOperator()); dag.addStream("rand_console", calc.result, console.input); }
/** * Test KafkaOutputOperator (i.e. an output adapter for Kafka, aka producer). This module sends * data into an ActiveMQ message bus. * * <p>[Generate tuple] ==> [send tuple through Kafka output adapter(i.e. producer) into Kafka * message bus] ==> [receive data in outside Kaka listener (i.e consumer)] * * @throws Exception */ @Test @SuppressWarnings({"SleepWhileInLoop", "empty-statement"}) public void testKafkaOutputOperator() throws Exception { // Setup a message listener to receive the message KafkaConsumer listener = new KafkaConsumer("topic1"); new Thread(listener).start(); // Malhar module to send message // Create DAG for testing. LocalMode lma = LocalMode.newInstance(); DAG dag = lma.getDAG(); // Create ActiveMQStringSinglePortOutputOperator StringGeneratorInputOperator generator = dag.addOperator("TestStringGenerator", StringGeneratorInputOperator.class); KafkaStringSinglePortOutputOperator node = dag.addOperator("Kafka message producer", KafkaStringSinglePortOutputOperator.class); // Set configuration parameters for Kafka node.setTopic("topic1"); // Connect ports dag.addStream("Kafka message", generator.outputPort, node.inputPort) .setLocality(Locality.CONTAINER_LOCAL); // Create local cluster final LocalMode.Controller lc = lma.getController(); lc.runAsync(); Thread.sleep(2000); lc.shutdown(); // Check values send vs received Assert.assertEquals("Number of emitted tuples", tupleCount, listener.holdingBuffer.size()); logger.debug(String.format("Number of emitted tuples: %d", listener.holdingBuffer.size())); Assert.assertEquals( "First tuple", "testString 1", listener.getMessage(listener.holdingBuffer.peek())); listener.close(); }
@Override public void populateDAG(DAG dag, Configuration conf) { // Setup the operator to get the data from twitter sample stream injected into the system. TwitterSampleInput twitterFeed = new TwitterSampleInput(); twitterFeed = dag.addOperator("TweetSampler", twitterFeed); // Setup a node to count the unique Hashtags within a window. UniqueCounter<String> uniqueCounter = dag.addOperator("UniqueHashtagCounter", new UniqueCounter<String>()); // Get the aggregated Hashtag counts and count them over last 5 mins. WindowedTopCounter<String> topCounts = dag.addOperator("TopCounter", new WindowedTopCounter<String>()); topCounts.setTopCount(10); topCounts.setSlidingWindowWidth(600); topCounts.setDagWindowWidth(1); dag.addStream("TwittedHashtags", twitterFeed.hashtag, uniqueCounter.data).setLocality(locality); // Count unique Hashtags dag.addStream("UniqueHashtagCounts", uniqueCounter.count, topCounts.input); TwitterTopCounterApplication.consoleOutput( dag, "topHashtags", topCounts.output, SNAPSHOT_SCHEMA, "hashtag"); }
@SuppressWarnings("unchecked") @Override public void populateDAG(DAG dag, Configuration conf) { KeyGen keyGen = dag.addOperator("KeyGenerator", new KeyGen()); UniqueValueCount<Integer> valCount = dag.addOperator("ValueCounter", new UniqueValueCount<Integer>()); IntegerUniqueValueCountAppender uniqueUnifier = dag.addOperator("Unique", new IntegerUniqueValueCountAppender()); VerifyTable verifyTable = dag.addOperator("VerifyTable", new VerifyTable()); @SuppressWarnings("rawtypes") DefaultOutputPort valOut = valCount.output; @SuppressWarnings("rawtypes") DefaultOutputPort uniqueOut = uniqueUnifier.output; dag.addStream("DataIn", keyGen.output, valCount.input); dag.addStream("UnifyWindows", valOut, uniqueUnifier.input); dag.addStream("ResultsOut", uniqueOut, verifyTable.input); }
@Override public void populateDAG(DAG dag, Configuration conf) { dag.setAttribute(DAG.APPLICATION_NAME, "HDHTBenchmarkApplication"); Generator gen = dag.addOperator("Generator", new Generator()); gen.setTupleBlast(1000); gen.setSleepms(0); dag.getOperatorMeta("Generator") .getAttributes() .put(Context.OperatorContext.APPLICATION_WINDOW_COUNT, 1); HDSOperator hdsOut = dag.addOperator("Store", new HDSOperator()); TFileImpl.DTFileImpl hdsFile = new TFileImpl.DTFileImpl(); hdsFile.setBasePath("WALBenchMarkDir"); hdsOut.setFileStore(hdsFile); dag.getOperatorMeta("Store") .getAttributes() .put(Context.OperatorContext.COUNTERS_AGGREGATOR, new HDHTWriter.BucketIOStatAggregator()); dag.addStream("s1", gen.out, hdsOut.input).setLocality(DAG.Locality.THREAD_LOCAL); }
@Override public void populateDAG(DAG dag, Configuration conf) { String appName = conf.get("appName"); if (appName == null) { appName = "VisualDataDemo"; } dag.setAttribute(DAG.APPLICATION_NAME, appName); int maxValue = 30000; RandomEventGenerator rand = dag.addOperator("random", new RandomEventGenerator()); rand.setMinvalue(0); rand.setMaxvalue(maxValue); DemoValueGenerator demo = dag.addOperator("chartValue", new DemoValueGenerator()); demo.setRandomIncrement(5); demo.setRandomIncrement2(20); PiCalculateOperator calc = dag.addOperator("picalc", new PiCalculateOperator()); calc.setBase(maxValue * maxValue); dag.addStream("rand_calc", rand.integer_data, calc.input).setLocality(locality); WidgetOutputOperator woo = dag.addOperator("widget output operator", new WidgetOutputOperator()); WidgetOutputOperator wooa = dag.addOperator("widget output operator2", new WidgetOutputOperator()); // wire to simple input gadget dag.addStream( "ws_pi_data", calc.output, woo.simpleInput.setTopic("app." + appName + ".piValue")) .setLocality(locality); // wire to time series chart gadget dag.addStream( "ws_chart_data", demo.simpleOutput, woo.timeSeriesInput.setTopic("app." + appName + ".chartValue").setMin(0).setMax(100)) .setLocality(locality); // wire to another time series chart gadget dag.addStream( "ws_chart_data2", demo.simpleOutput2, wooa.timeSeriesInput.setTopic("app." + appName + ".chartValue2")) .setLocality(locality); // wire to percentage chart gadget dag.addStream( "ws_percentage_data", demo.percentageOutput, woo.percentageInput.setTopic("app." + appName + ".percentage")) .setLocality(locality); // wire to top N chart gadget dag.addStream( "ws_topn_data", demo.top10Output, woo.topNInput.setN(10).setTopic("app." + appName + ".topn")) .setLocality(locality); // wire to progress bar chart gadget dag.addStream( "ws_progress_data", demo.progressOutput, wooa.percentageInput.setTopic("app." + appName + ".progress")) .setLocality(locality); // wire to piechart gadget dag.addStream( "ws_piechart_data", demo.pieChartOutput, wooa.pieChartInput.setTopic("app." + appName + ".piechart")) .setLocality(locality); }
@Override public void populateDAG(DAG dag, Configuration conf) { String lPhoneRange = conf.get(PHONE_RANGE_PROP, null); if (lPhoneRange != null) { String[] tokens = lPhoneRange.split("-"); if (tokens.length != 2) { throw new IllegalArgumentException("Invalid range: " + lPhoneRange); } this.phoneRange = Range.between(Integer.parseInt(tokens[0]), Integer.parseInt(tokens[1])); } LOG.debug("Phone range {}", this.phoneRange); RandomEventGenerator phones = dag.addOperator("Receiver", RandomEventGenerator.class); phones.setMinvalue(this.phoneRange.getMinimum()); phones.setMaxvalue(this.phoneRange.getMaximum()); PhoneMovementGenerator movementGen = dag.addOperator("LocationFinder", PhoneMovementGenerator.class); dag.setAttribute( movementGen, OperatorContext.COUNTERS_AGGREGATOR, new BasicCounters.LongAggregator<MutableLong>()); StatelessThroughputBasedPartitioner<PhoneMovementGenerator> partitioner = new StatelessThroughputBasedPartitioner<PhoneMovementGenerator>(); partitioner.setCooldownMillis(conf.getLong(COOL_DOWN_MILLIS, 45000)); partitioner.setMaximumEvents(conf.getLong(MAX_THROUGHPUT, 30000)); partitioner.setMinimumEvents(conf.getLong(MIN_THROUGHPUT, 10000)); dag.setAttribute( movementGen, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[] {partitioner})); dag.setAttribute(movementGen, OperatorContext.PARTITIONER, partitioner); // generate seed numbers Random random = new Random(); int maxPhone = phoneRange.getMaximum() - phoneRange.getMinimum(); int phonesToDisplay = conf.getInt(TOTAL_SEED_NOS, 10); for (int i = phonesToDisplay; i-- > 0; ) { int phoneNo = phoneRange.getMinimum() + random.nextInt(maxPhone + 1); LOG.info("seed no: " + phoneNo); movementGen.phoneRegister.add(phoneNo); } // done generating data LOG.info("Finished generating seed data."); String gatewayAddress = dag.getValue(DAG.GATEWAY_CONNECT_ADDRESS); URI uri = URI.create("ws://" + gatewayAddress + "/pubsub"); PubSubWebSocketOutputOperator<Object> wsOut = dag.addOperator("LocationResults", new PubSubWebSocketOutputOperator<Object>()); wsOut.setUri(uri); PubSubWebSocketInputOperator<Map<String, String>> wsIn = dag.addOperator("QueryLocation", new PubSubWebSocketInputOperator<Map<String, String>>()); wsIn.setUri(uri); // default partitioning: first connected stream to movementGen will be partitioned dag.addStream("Phone-Data", phones.integer_data, movementGen.data); dag.addStream("Results", movementGen.locationQueryResult, wsOut.input); dag.addStream("Query", wsIn.outputPort, movementGen.phoneQuery); }
public void populateDAG(DAG dag, Configuration conf) { /* * Config for k-fold cross validation of the Naive Bayes Model */ numFolds = Integer.parseInt(conf.get("dt.ml.classification.nb.numFolds")); numAttributes = Integer.parseInt(conf.get("dt.ml.classification.nb.numAttributes")); numClasses = Integer.parseInt(conf.get("dt.ml.classification.nb.numClasses")); inputDataFilePath = conf.get("dt.ml.classification.nb.inputDataFilePath"); modelDir = conf.get("dt.ml.classification.nb.modelDir"); modelFileName = conf.get("dt.ml.classification.nb.modelFileName"); resultDir = conf.get("dt.ml.classification.nb.resultDir"); resultFileName = conf.get("dt.ml.classification.nb.resultFileName"); isKFold = conf.getBoolean("dt.ml.classification.nb.isKFold", false); isTrain = conf.getBoolean("dt.ml.classification.nb.isTrain", false); isEvaluate = conf.getBoolean("dt.ml.classification.nb.isEvaluate", false); System.out.println("NumFolds = " + numFolds); System.out.println("isKFold = " + isKFold); System.out.println("isTrain = " + isTrain); System.out.println("isEvaluate = " + isEvaluate); System.out.println("Input file path = " + inputDataFilePath); if (!isKFold && !isTrain && !isEvaluate) { System.out.println("Invalid Params. K-Fold evaluation, Training or Testing must be selected"); return; } if (isKFold && numFolds <= 1) { System.out.println("Invalid Params. Number of folds should be > 1"); return; } NBConfig nbc = new NBConfig( isKFold, // K-Fold Validation isTrain, // Only Train isEvaluate, // Only Evaluate numFolds, // Number of folds numAttributes, // Number of attributes numClasses, // Number of classes inputDataFilePath, // Input Data File modelDir, // Model Dir modelFileName, // Model File Name Base resultDir, // Result Dir resultFileName // Result file name ); // NBConfig nbc = new NBConfig( // true, // K-Fold Validation // true, // Only Train // false, // Only Evaluate // 5, // Number of folds // 28, //Number of attributes // 2, //Number of classes // "/input/HIGGS_TRAIN", // Input Data File // "/pmmloutput", // Model Dir // "PMML_HIGGS_CATEGORICAL.xml", // Model File Name Base // "/testOutput", // Result Dir // "output" // Result file name // ); /* * Define Operators */ // File Input Operator NBLineReader opInput = dag.addOperator("File_Input", new NBLineReader(nbc)); // Input Reader NBInputReader opNBInputReader = dag.addOperator("Parser", new NBInputReader(nbc)); // NB Counter NBCounter opNBCounter = dag.addOperator("Counter", new NBCounter(nbc)); // NB Aggregator NBModelAggregator<NBModelStorage> opNBAggregator = dag.addOperator("Model_Updater", new NBModelAggregator<NBModelStorage>(nbc)); // NB Evaluator NBEvaluator opNBEvaluator = dag.addOperator("Evaluator", new NBEvaluator(nbc)); // File Output Operator NBOutputPerWindowOperator opNBOutput = dag.addOperator("Model_Writer", new NBOutputPerWindowOperator(nbc)); /* * Define Streams */ dag.addStream("To Parser", opInput.lineOutputPort, opNBInputReader.input) .setLocality(Locality.THREAD_LOCAL); dag.addStream("Control_Parser", opInput.controlOut, opNBInputReader.controlIn) .setLocality(Locality.THREAD_LOCAL); dag.addStream("To Counter", opNBInputReader.outForTraining, opNBCounter.inTraining); dag.addStream("To Model Updater", opNBCounter.outTraining, opNBAggregator.inTraining); dag.addStream("To Model Writer", opNBAggregator.outTraining, opNBOutput.inMultiWriter); dag.addStream("To Evaluator", opNBInputReader.outForEvaluation, opNBEvaluator.inForEvaluation); dag.addStream("To Result Writer", opNBEvaluator.outToWriter, opNBOutput.inStringWriter); dag.addStream("Control_Counter", opNBInputReader.controlOut, opNBCounter.controlIn); dag.addStream("Control_Updater", opNBCounter.controlOut, opNBAggregator.controlIn); dag.addStream("Control_Writer", opNBAggregator.controlOut, opNBOutput.controlIn); dag.addStream( "To Evaluator (K-fold models)", opNBAggregator.outEvaluator, opNBEvaluator.inKFoldModels); }
@Override protected StringGeneratorInputOperator addGenerateOperator(DAG dag) { return dag.addOperator("TestStringGenerator", StringGeneratorInputOperator.class); // StringGeneratorInputOperator generator = }
@Override protected KinesisStringOutputOperator addTestingOperator(DAG dag) { return dag.addOperator("KinesisMessageProducer", KinesisStringOutputOperator.class); }