public Generator(ParameterTool pt) {
   this.payload = new byte[pt.getInt("payload")];
   this.delay = pt.getInt("delay");
   this.withFt = pt.has("ft");
   this.latFreq = pt.getInt("latencyFreq");
   this.sleepFreq = pt.getInt("sleepFreq");
 }
  public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);

    int par = pt.getInt("para");

    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout("source0", new Generator(pt), pt.getInt("sourceParallelism"));
    int i = 0;
    for (; i < pt.getInt("repartitions", 1) - 1; i++) {
      System.out.println("adding source" + i + " --> source" + (i + 1));
      builder
          .setBolt("source" + (i + 1), new RepartPassThroughBolt(pt), pt.getInt("sinkParallelism"))
          .fieldsGrouping("source" + i, new Fields("id"));
    }
    System.out.println("adding final source" + i + " --> sink");

    builder
        .setBolt("sink", new Sink(pt), pt.getInt("sinkParallelism"))
        .fieldsGrouping("source" + i, new Fields("id"));

    Config conf = new Config();
    conf.setDebug(false);
    // System.exit(1);

    if (!pt.has("local")) {
      conf.setNumWorkers(par);

      StormSubmitter.submitTopologyWithProgressBar(
          "throughput-" + pt.get("name", "no_name"), conf, builder.createTopology());
    } else {
      conf.setMaxTaskParallelism(par);

      LocalCluster cluster = new LocalCluster();
      cluster.submitTopology("throughput", conf, builder.createTopology());

      Thread.sleep(300000);

      cluster.shutdown();
    }
  }
Exemplo n.º 3
0
  public static void main(String[] args) throws Exception {

    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    System.out.println(
        "Usage: KMeans --points <path> --centroids <path> --output <path> --iterations <n>");

    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig()
        .setGlobalJobParameters(params); // make parameters available in the web interface

    // get input data:
    // read the points and centroids from the provided paths or fall back to default data
    DataSet<Point> points = getPointDataSet(params, env);
    DataSet<Centroid> centroids = getCentroidDataSet(params, env);

    // set number of bulk iterations for KMeans algorithm
    IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

    DataSet<Centroid> newCentroids =
        points
            // compute closest centroid for each point
            .map(new SelectNearestCenter())
            .withBroadcastSet(loop, "centroids")
            // count and sum point coordinates for each centroid
            .map(new CountAppender())
            .groupBy(0)
            .reduce(new CentroidAccumulator())
            // compute new centroids from point counts and coordinate sums
            .map(new CentroidAverager());

    // feed new centroids back into next iteration
    DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

    DataSet<Tuple2<Integer, Point>> clusteredPoints =
        points
            // assign points to final clusters
            .map(new SelectNearestCenter())
            .withBroadcastSet(finalCentroids, "centroids");

    // emit result
    if (params.has("output")) {
      clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

      // since file sinks are lazy, we trigger the execution explicitly
      env.execute("KMeans Example");
    } else {
      System.out.println("Printing result to stdout. Use --output to specify output path.");
      clusteredPoints.print();
    }
  }
 public Sink(ParameterTool pt) throws UnknownHostException {
   this.pt = pt;
   this.withFT = pt.has("ft");
   this.logfreq = pt.getInt("logfreq");
   this.host = InetAddress.getLocalHost().getHostName();
 }