示例#1
0
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    KafkaSinglePortInputOperator in =
        dag.addOperator("kafkaIn", new KafkaSinglePortInputOperator());

    in.setInitialOffset(AbstractKafkaInputOperator.InitialOffset.EARLIEST.name());
    LineOutputOperator out = dag.addOperator("fileOut", new LineOutputOperator());

    dag.addStream("data", in.outputPort, out.input);
  }
示例#2
0
 @Override
 public void populateDAG(DAG dag, Configuration configuration) {
   NBTestInputOperator inp =
       (NBTestInputOperator) dag.addOperator("vectorize", new NBTestInputOperator());
   ClassifierOperator classifier =
       (ClassifierOperator) dag.addOperator("classifier", new ClassifierOperator());
   ConsoleOutputOperator out =
       (ConsoleOutputOperator) dag.addOperator("console", new ConsoleOutputOperator());
   dag.addStream("vectorize_classifier", inp.data, classifier.input).setLocality(this.locality);
   dag.addStream("classifier_console", classifier.output, out.input).setLocality(this.locality);
 }
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    HBaseRowStringGenerator row = dag.addOperator("rand", new HBaseRowStringGenerator());

    HBaseCsvMappingPutOperator csvMappingPutOperator =
        dag.addOperator("HBaseoper", new HBaseCsvMappingPutOperator());
    csvMappingPutOperator.getStore().setTableName("table1");
    csvMappingPutOperator.getStore().setZookeeperQuorum("127.0.0.1");
    csvMappingPutOperator.getStore().setZookeeperClientPort(2181);
    csvMappingPutOperator.setMappingString("colfam0.street,colfam0.city,colfam0.state,row");
    dag.addStream("hbasestream", row.outputPort, csvMappingPutOperator.input).setLocality(locality);
  }
  /**
   * Test AbstractKafkaSinglePortInputOperator (i.e. an input adapter for Kafka, aka consumer). This
   * module receives data from an outside test generator through Kafka message bus and feed that
   * data into Malhar streaming platform.
   *
   * <p>[Generate message and send that to Kafka message bus] ==> [Receive that message through
   * Kafka input adapter(i.e. consumer) and send using emitTuples() interface on output port during
   * onMessage call]
   *
   * @throws Exception
   */
  public void testKafkaInputOperator(
      int sleepTime, final int totalCount, KafkaConsumer consumer, boolean isValid)
      throws Exception {
    // initial the latch for this test
    latch = new CountDownLatch(1);

    // Start producer
    KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
    p.setSendCount(totalCount);
    new Thread(p).start();

    // Create DAG for testing.
    LocalMode lma = LocalMode.newInstance();
    DAG dag = lma.getDAG();

    // Create KafkaSinglePortStringInputOperator
    KafkaSinglePortStringInputOperator node =
        dag.addOperator("Kafka message consumer", KafkaSinglePortStringInputOperator.class);
    consumer.setTopic(TEST_TOPIC);
    if (isValid) {
      Set<String> brokerSet = new HashSet<String>();
      brokerSet.add("localhost:9092");
      consumer.setBrokerSet(brokerSet);
    }
    node.setConsumer(consumer);

    // Create Test tuple collector
    CollectorModule<String> collector =
        dag.addOperator("TestMessageCollector", new CollectorModule<String>());

    // Connect ports
    dag.addStream("Kafka message", node.outputPort, collector.inputPort)
        .setLocality(Locality.CONTAINER_LOCAL);

    // Create local cluster
    final LocalMode.Controller lc = lma.getController();
    lc.setHeartbeatMonitoringEnabled(false);

    lc.runAsync();

    // Wait 30s for consumer finish consuming all the messages
    Assert.assertTrue("TIMEOUT: 30s ", latch.await(30000, TimeUnit.MILLISECONDS));

    // Check results
    Assert.assertEquals("Collections size", 1, collections.size());
    Assert.assertEquals("Tuple count", totalCount, collections.get(collector.inputPort.id).size());
    logger.debug(
        String.format(
            "Number of emitted tuples: %d", collections.get(collector.inputPort.id).size()));

    p.close();
    lc.shutdown();
  }
示例#5
0
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    // Sample DAG with 2 operators
    // Replace this code with the DAG you want to build

    RandomNumberGenerator randomGenerator =
        dag.addOperator("randomGenerator", RandomNumberGenerator.class);
    randomGenerator.setNumTuples(500);

    TupleReceiver tupleReceiver = dag.addOperator("console", new TupleReceiver());

    dag.addStream("randomData", randomGenerator.out, tupleReceiver.input);
  }
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    int maxValue = 1000;

    RandomEventGenerator rand = dag.addOperator("rand", new RandomEventGenerator());
    rand.setMinvalue(0);
    rand.setMaxvalue(maxValue);
    rand.setTuplesBlast(200);
    CouchBaseOutputOperator couchbaseOutput =
        dag.addOperator("couchbaseOutput", new CouchBaseOutputOperator());
    // couchbaseOutput.getStore().setBucket("default");
    // couchbaseOutput.getStore().setPassword("");
    dag.addStream("ss", rand.integer_data, couchbaseOutput.input).setLocality(locality);
  }
示例#7
0
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    // Sample DAG with 2 operators
    // Replace this code with the DAG you want to build

    SeedEventGenerator seedGen = dag.addOperator("seedGen", SeedEventGenerator.class);
    seedGen.setSeedstart(1);
    seedGen.setSeedend(10);
    seedGen.addKeyData("x", 0, 10);
    seedGen.addKeyData("y", 0, 100);

    ConsoleOutputOperator cons = dag.addOperator("console", new ConsoleOutputOperator());
    cons.setStringFormat("hello: %s");

    dag.addStream("seeddata", seedGen.val_list, cons.input).setLocality(Locality.CONTAINER_LOCAL);
  }
    @Override
    @SuppressWarnings("unchecked")
    public void populateDAG(DAG dag, Configuration conf) {
      RandomWordGenerator randomWordGenerator = new RandomWordGenerator();
      randomWordGenerator.setTuplesPerWindow(2);

      dag.addOperator("random", randomWordGenerator);

      if (maxLength != null) {
        fsWriter.setMaxLength(maxLength);
      }

      fsWriter.setFilePath(testDir.getPath());
      dag.addOperator("fswriter", fsWriter);

      dag.addStream("fswriterstream", randomWordGenerator.output, fsWriter.input);
    }
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    // Create ActiveMQStringSinglePortOutputOperator
    SpillableTestInputOperator input = new SpillableTestInputOperator();
    input.batchSize = 100;
    input.sleepBetweenBatch = 0;
    input = dag.addOperator("input", input);

    SpillableTestOperator testOperator = new SpillableTestOperator();
    testOperator.store = createStore(conf);
    testOperator.shutdownCount = -1;
    testOperator = dag.addOperator("test", testOperator);

    // Connect ports
    dag.addStream("stream", input.output, testOperator.input)
        .setLocality(DAG.Locality.CONTAINER_LOCAL);
  }
示例#10
0
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    int maxValue = 30000;

    RandomEventGenerator rand = dag.addOperator("rand", new RandomEventGenerator());
    rand.setMinvalue(0);
    rand.setMaxvalue(maxValue);

    RoundRobinHashMap<String, Object> rrhm =
        dag.addOperator("rrhm", new RoundRobinHashMap<String, Object>());
    rrhm.setKeys(new String[] {"x", "y"});

    JavaScriptOperator calc = dag.addOperator("picalc", new JavaScriptOperator());
    calc.setPassThru(false);
    calc.put("i", 0);
    calc.put("count", 0);
    calc.addSetupScript(
        "function pi() { if (x*x+y*y <= "
            + maxValue * maxValue
            + ") { i++; } count++; return i / count * 4; }");

    calc.setInvoke("pi");

    dag.addStream("rand_rrhm", rand.integer_data, rrhm.data);
    dag.addStream("rrhm_calc", rrhm.map, calc.inBindings);

    ConsoleOutputOperator console = dag.addOperator("console", new ConsoleOutputOperator());
    dag.addStream("rand_console", calc.result, console.input);
  }
  /**
   * Test KafkaOutputOperator (i.e. an output adapter for Kafka, aka producer). This module sends
   * data into an ActiveMQ message bus.
   *
   * <p>[Generate tuple] ==> [send tuple through Kafka output adapter(i.e. producer) into Kafka
   * message bus] ==> [receive data in outside Kaka listener (i.e consumer)]
   *
   * @throws Exception
   */
  @Test
  @SuppressWarnings({"SleepWhileInLoop", "empty-statement"})
  public void testKafkaOutputOperator() throws Exception {
    // Setup a message listener to receive the message
    KafkaConsumer listener = new KafkaConsumer("topic1");
    new Thread(listener).start();

    // Malhar module to send message
    // Create DAG for testing.
    LocalMode lma = LocalMode.newInstance();
    DAG dag = lma.getDAG();

    // Create ActiveMQStringSinglePortOutputOperator
    StringGeneratorInputOperator generator =
        dag.addOperator("TestStringGenerator", StringGeneratorInputOperator.class);
    KafkaStringSinglePortOutputOperator node =
        dag.addOperator("Kafka message producer", KafkaStringSinglePortOutputOperator.class);
    // Set configuration parameters for Kafka
    node.setTopic("topic1");

    // Connect ports
    dag.addStream("Kafka message", generator.outputPort, node.inputPort)
        .setLocality(Locality.CONTAINER_LOCAL);

    // Create local cluster
    final LocalMode.Controller lc = lma.getController();
    lc.runAsync();

    Thread.sleep(2000);
    lc.shutdown();

    // Check values send vs received
    Assert.assertEquals("Number of emitted tuples", tupleCount, listener.holdingBuffer.size());
    logger.debug(String.format("Number of emitted tuples: %d", listener.holdingBuffer.size()));
    Assert.assertEquals(
        "First tuple", "testString 1", listener.getMessage(listener.holdingBuffer.peek()));

    listener.close();
  }
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    // Setup the operator to get the data from twitter sample stream injected into the system.
    TwitterSampleInput twitterFeed = new TwitterSampleInput();
    twitterFeed = dag.addOperator("TweetSampler", twitterFeed);

    // Setup a node to count the unique Hashtags within a window.
    UniqueCounter<String> uniqueCounter =
        dag.addOperator("UniqueHashtagCounter", new UniqueCounter<String>());

    // Get the aggregated Hashtag counts and count them over last 5 mins.
    WindowedTopCounter<String> topCounts =
        dag.addOperator("TopCounter", new WindowedTopCounter<String>());
    topCounts.setTopCount(10);
    topCounts.setSlidingWindowWidth(600);
    topCounts.setDagWindowWidth(1);

    dag.addStream("TwittedHashtags", twitterFeed.hashtag, uniqueCounter.data).setLocality(locality);
    // Count unique Hashtags
    dag.addStream("UniqueHashtagCounts", uniqueCounter.count, topCounts.input);

    TwitterTopCounterApplication.consoleOutput(
        dag, "topHashtags", topCounts.output, SNAPSHOT_SCHEMA, "hashtag");
  }
示例#13
0
    @SuppressWarnings("unchecked")
    @Override
    public void populateDAG(DAG dag, Configuration conf) {
      KeyGen keyGen = dag.addOperator("KeyGenerator", new KeyGen());
      UniqueValueCount<Integer> valCount =
          dag.addOperator("ValueCounter", new UniqueValueCount<Integer>());
      IntegerUniqueValueCountAppender uniqueUnifier =
          dag.addOperator("Unique", new IntegerUniqueValueCountAppender());
      VerifyTable verifyTable = dag.addOperator("VerifyTable", new VerifyTable());

      @SuppressWarnings("rawtypes")
      DefaultOutputPort valOut = valCount.output;
      @SuppressWarnings("rawtypes")
      DefaultOutputPort uniqueOut = uniqueUnifier.output;
      dag.addStream("DataIn", keyGen.output, valCount.input);
      dag.addStream("UnifyWindows", valOut, uniqueUnifier.input);
      dag.addStream("ResultsOut", uniqueOut, verifyTable.input);
    }
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    dag.setAttribute(DAG.APPLICATION_NAME, "HDHTBenchmarkApplication");
    Generator gen = dag.addOperator("Generator", new Generator());
    gen.setTupleBlast(1000);
    gen.setSleepms(0);
    dag.getOperatorMeta("Generator")
        .getAttributes()
        .put(Context.OperatorContext.APPLICATION_WINDOW_COUNT, 1);

    HDSOperator hdsOut = dag.addOperator("Store", new HDSOperator());
    TFileImpl.DTFileImpl hdsFile = new TFileImpl.DTFileImpl();
    hdsFile.setBasePath("WALBenchMarkDir");
    hdsOut.setFileStore(hdsFile);
    dag.getOperatorMeta("Store")
        .getAttributes()
        .put(Context.OperatorContext.COUNTERS_AGGREGATOR, new HDHTWriter.BucketIOStatAggregator());

    dag.addStream("s1", gen.out, hdsOut.input).setLocality(DAG.Locality.THREAD_LOCAL);
  }
示例#15
0
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    String appName = conf.get("appName");
    if (appName == null) {
      appName = "VisualDataDemo";
    }
    dag.setAttribute(DAG.APPLICATION_NAME, appName);
    int maxValue = 30000;

    RandomEventGenerator rand = dag.addOperator("random", new RandomEventGenerator());
    rand.setMinvalue(0);
    rand.setMaxvalue(maxValue);

    DemoValueGenerator demo = dag.addOperator("chartValue", new DemoValueGenerator());
    demo.setRandomIncrement(5);
    demo.setRandomIncrement2(20);

    PiCalculateOperator calc = dag.addOperator("picalc", new PiCalculateOperator());
    calc.setBase(maxValue * maxValue);
    dag.addStream("rand_calc", rand.integer_data, calc.input).setLocality(locality);

    WidgetOutputOperator woo =
        dag.addOperator("widget output operator", new WidgetOutputOperator());
    WidgetOutputOperator wooa =
        dag.addOperator("widget output operator2", new WidgetOutputOperator());

    // wire to simple input gadget
    dag.addStream(
            "ws_pi_data", calc.output, woo.simpleInput.setTopic("app." + appName + ".piValue"))
        .setLocality(locality);

    // wire to time series chart gadget
    dag.addStream(
            "ws_chart_data",
            demo.simpleOutput,
            woo.timeSeriesInput.setTopic("app." + appName + ".chartValue").setMin(0).setMax(100))
        .setLocality(locality);

    // wire to another time series chart gadget
    dag.addStream(
            "ws_chart_data2",
            demo.simpleOutput2,
            wooa.timeSeriesInput.setTopic("app." + appName + ".chartValue2"))
        .setLocality(locality);

    // wire to percentage chart gadget
    dag.addStream(
            "ws_percentage_data",
            demo.percentageOutput,
            woo.percentageInput.setTopic("app." + appName + ".percentage"))
        .setLocality(locality);

    // wire to top N chart gadget
    dag.addStream(
            "ws_topn_data",
            demo.top10Output,
            woo.topNInput.setN(10).setTopic("app." + appName + ".topn"))
        .setLocality(locality);

    // wire to progress bar chart gadget
    dag.addStream(
            "ws_progress_data",
            demo.progressOutput,
            wooa.percentageInput.setTopic("app." + appName + ".progress"))
        .setLocality(locality);

    // wire to piechart gadget
    dag.addStream(
            "ws_piechart_data",
            demo.pieChartOutput,
            wooa.pieChartInput.setTopic("app." + appName + ".piechart"))
        .setLocality(locality);
  }
示例#16
0
  @Override
  public void populateDAG(DAG dag, Configuration conf) {
    String lPhoneRange = conf.get(PHONE_RANGE_PROP, null);
    if (lPhoneRange != null) {
      String[] tokens = lPhoneRange.split("-");
      if (tokens.length != 2) {
        throw new IllegalArgumentException("Invalid range: " + lPhoneRange);
      }
      this.phoneRange = Range.between(Integer.parseInt(tokens[0]), Integer.parseInt(tokens[1]));
    }
    LOG.debug("Phone range {}", this.phoneRange);

    RandomEventGenerator phones = dag.addOperator("Receiver", RandomEventGenerator.class);
    phones.setMinvalue(this.phoneRange.getMinimum());
    phones.setMaxvalue(this.phoneRange.getMaximum());

    PhoneMovementGenerator movementGen =
        dag.addOperator("LocationFinder", PhoneMovementGenerator.class);
    dag.setAttribute(
        movementGen,
        OperatorContext.COUNTERS_AGGREGATOR,
        new BasicCounters.LongAggregator<MutableLong>());

    StatelessThroughputBasedPartitioner<PhoneMovementGenerator> partitioner =
        new StatelessThroughputBasedPartitioner<PhoneMovementGenerator>();
    partitioner.setCooldownMillis(conf.getLong(COOL_DOWN_MILLIS, 45000));
    partitioner.setMaximumEvents(conf.getLong(MAX_THROUGHPUT, 30000));
    partitioner.setMinimumEvents(conf.getLong(MIN_THROUGHPUT, 10000));
    dag.setAttribute(
        movementGen,
        OperatorContext.STATS_LISTENERS,
        Arrays.asList(new StatsListener[] {partitioner}));
    dag.setAttribute(movementGen, OperatorContext.PARTITIONER, partitioner);

    // generate seed numbers
    Random random = new Random();
    int maxPhone = phoneRange.getMaximum() - phoneRange.getMinimum();
    int phonesToDisplay = conf.getInt(TOTAL_SEED_NOS, 10);
    for (int i = phonesToDisplay; i-- > 0; ) {
      int phoneNo = phoneRange.getMinimum() + random.nextInt(maxPhone + 1);
      LOG.info("seed no: " + phoneNo);
      movementGen.phoneRegister.add(phoneNo);
    }
    // done generating data
    LOG.info("Finished generating seed data.");

    String gatewayAddress = dag.getValue(DAG.GATEWAY_CONNECT_ADDRESS);
    URI uri = URI.create("ws://" + gatewayAddress + "/pubsub");
    PubSubWebSocketOutputOperator<Object> wsOut =
        dag.addOperator("LocationResults", new PubSubWebSocketOutputOperator<Object>());
    wsOut.setUri(uri);
    PubSubWebSocketInputOperator<Map<String, String>> wsIn =
        dag.addOperator("QueryLocation", new PubSubWebSocketInputOperator<Map<String, String>>());
    wsIn.setUri(uri);
    // default partitioning: first connected stream to movementGen will be partitioned
    dag.addStream("Phone-Data", phones.integer_data, movementGen.data);
    dag.addStream("Results", movementGen.locationQueryResult, wsOut.input);
    dag.addStream("Query", wsIn.outputPort, movementGen.phoneQuery);
  }
  public void populateDAG(DAG dag, Configuration conf) {

    /*
     * Config for k-fold cross validation of the Naive Bayes Model
     */

    numFolds = Integer.parseInt(conf.get("dt.ml.classification.nb.numFolds"));
    numAttributes = Integer.parseInt(conf.get("dt.ml.classification.nb.numAttributes"));
    numClasses = Integer.parseInt(conf.get("dt.ml.classification.nb.numClasses"));
    inputDataFilePath = conf.get("dt.ml.classification.nb.inputDataFilePath");
    modelDir = conf.get("dt.ml.classification.nb.modelDir");
    modelFileName = conf.get("dt.ml.classification.nb.modelFileName");
    resultDir = conf.get("dt.ml.classification.nb.resultDir");
    resultFileName = conf.get("dt.ml.classification.nb.resultFileName");
    isKFold = conf.getBoolean("dt.ml.classification.nb.isKFold", false);
    isTrain = conf.getBoolean("dt.ml.classification.nb.isTrain", false);
    isEvaluate = conf.getBoolean("dt.ml.classification.nb.isEvaluate", false);

    System.out.println("NumFolds = " + numFolds);
    System.out.println("isKFold = " + isKFold);
    System.out.println("isTrain = " + isTrain);
    System.out.println("isEvaluate = " + isEvaluate);
    System.out.println("Input file path = " + inputDataFilePath);

    if (!isKFold && !isTrain && !isEvaluate) {
      System.out.println("Invalid Params. K-Fold evaluation, Training or Testing must be selected");
      return;
    }
    if (isKFold && numFolds <= 1) {
      System.out.println("Invalid Params. Number of folds should be  > 1");
      return;
    }

    NBConfig nbc =
        new NBConfig(
            isKFold, // K-Fold Validation
            isTrain, // Only Train
            isEvaluate, // Only Evaluate
            numFolds, // Number of folds
            numAttributes, // Number of attributes
            numClasses, // Number of classes
            inputDataFilePath, // Input Data File
            modelDir, // Model Dir
            modelFileName, // Model File Name Base
            resultDir, // Result Dir
            resultFileName // Result file name
            );

    //    NBConfig nbc = new NBConfig(
    //        true,              // K-Fold Validation
    //        true,               // Only Train
    //        false,              // Only Evaluate
    //        5,                 // Number of folds
    //        28,                 //Number of attributes
    //        2,                  //Number of classes
    //        "/input/HIGGS_TRAIN",       // Input Data File
    //        "/pmmloutput",          // Model Dir
    //        "PMML_HIGGS_CATEGORICAL.xml",  // Model File Name Base
    //        "/testOutput",           // Result Dir
    //        "output"            // Result file name
    //        );

    /*
     * Define Operators
     */

    // File Input Operator
    NBLineReader opInput = dag.addOperator("File_Input", new NBLineReader(nbc));

    // Input Reader
    NBInputReader opNBInputReader = dag.addOperator("Parser", new NBInputReader(nbc));

    // NB Counter
    NBCounter opNBCounter = dag.addOperator("Counter", new NBCounter(nbc));

    // NB Aggregator
    NBModelAggregator<NBModelStorage> opNBAggregator =
        dag.addOperator("Model_Updater", new NBModelAggregator<NBModelStorage>(nbc));

    // NB Evaluator
    NBEvaluator opNBEvaluator = dag.addOperator("Evaluator", new NBEvaluator(nbc));

    // File Output Operator
    NBOutputPerWindowOperator opNBOutput =
        dag.addOperator("Model_Writer", new NBOutputPerWindowOperator(nbc));

    /*
     * Define Streams
     */
    dag.addStream("To Parser", opInput.lineOutputPort, opNBInputReader.input)
        .setLocality(Locality.THREAD_LOCAL);
    dag.addStream("Control_Parser", opInput.controlOut, opNBInputReader.controlIn)
        .setLocality(Locality.THREAD_LOCAL);

    dag.addStream("To Counter", opNBInputReader.outForTraining, opNBCounter.inTraining);
    dag.addStream("To Model Updater", opNBCounter.outTraining, opNBAggregator.inTraining);
    dag.addStream("To Model Writer", opNBAggregator.outTraining, opNBOutput.inMultiWriter);

    dag.addStream("To Evaluator", opNBInputReader.outForEvaluation, opNBEvaluator.inForEvaluation);
    dag.addStream("To Result Writer", opNBEvaluator.outToWriter, opNBOutput.inStringWriter);

    dag.addStream("Control_Counter", opNBInputReader.controlOut, opNBCounter.controlIn);
    dag.addStream("Control_Updater", opNBCounter.controlOut, opNBAggregator.controlIn);
    dag.addStream("Control_Writer", opNBAggregator.controlOut, opNBOutput.controlIn);

    dag.addStream(
        "To Evaluator (K-fold models)", opNBAggregator.outEvaluator, opNBEvaluator.inKFoldModels);
  }
 @Override
 protected StringGeneratorInputOperator addGenerateOperator(DAG dag) {
   return dag.addOperator("TestStringGenerator", StringGeneratorInputOperator.class);
   // StringGeneratorInputOperator generator =
 }
 @Override
 protected KinesisStringOutputOperator addTestingOperator(DAG dag) {
   return dag.addOperator("KinesisMessageProducer", KinesisStringOutputOperator.class);
 }