Ejemplo n.º 1
0
 private static DataSet<Tuple3<Integer, String, Integer>> getRanksDataSet(
     ExecutionEnvironment env, ParameterTool params) {
   // Create DataSet for ranks relation (Rank, URL, Avg-Visit-Duration)
   if (params.has("ranks")) {
     return env.readCsvFile(params.get("ranks"))
         .fieldDelimiter("|")
         .types(Integer.class, String.class, Integer.class);
   } else {
     System.out.println("Executing WebLogAnalysis example with default ranks data set.");
     System.out.println("Use --ranks to specify file input.");
     return WebLogData.getRankDataSet(env);
   }
 }
Ejemplo n.º 2
0
 private static DataSet<Tuple2<String, String>> getDocumentsDataSet(
     ExecutionEnvironment env, ParameterTool params) {
   // Create DataSet for documents relation (URL, Doc-Text)
   if (params.has("documents")) {
     return env.readCsvFile(params.get("documents"))
         .fieldDelimiter("|")
         .types(String.class, String.class);
   } else {
     System.out.println("Executing WebLogAnalysis example with default documents data set.");
     System.out.println("Use --documents to specify file input.");
     return WebLogData.getDocumentDataSet(env);
   }
 }
Ejemplo n.º 3
0
 private static DataSet<Point> getPointDataSet(ParameterTool params, ExecutionEnvironment env) {
   DataSet<Point> points;
   if (params.has("points")) {
     // read points from CSV file
     points =
         env.readCsvFile(params.get("points")).fieldDelimiter(" ").pojoType(Point.class, "x", "y");
   } else {
     System.out.println("Executing K-Means example with default point data set.");
     System.out.println("Use --points to specify file input.");
     points = KMeansData.getDefaultPointDataSet(env);
   }
   return points;
 }
  private Plan getTestPlanRightStatic(String strategy) {

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);

    DataSet<Tuple3<Long, Long, Long>> bigInput =
        env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");

    DataSet<Tuple3<Long, Long, Long>> smallInput =
        env.readCsvFile("file://smallFile")
            .types(Long.class, Long.class, Long.class)
            .name("smallFile");

    IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);

    Configuration joinStrategy = new Configuration();
    joinStrategy.setString(
        Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH);

    if (!strategy.equals("")) {
      joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
    }

    DataSet<Tuple3<Long, Long, Long>> inner =
        iteration
            .join(smallInput)
            .where(0)
            .equalTo(0)
            .with(new DummyJoiner())
            .name("DummyJoiner")
            .withParameters(joinStrategy);

    DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);

    output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());

    return env.createProgramPlan();
  }
Ejemplo n.º 5
0
 private static DataSet<Centroid> getCentroidDataSet(
     ParameterTool params, ExecutionEnvironment env) {
   DataSet<Centroid> centroids;
   if (params.has("centroids")) {
     centroids =
         env.readCsvFile(params.get("centroids"))
             .fieldDelimiter(" ")
             .pojoType(Centroid.class, "id", "x", "y");
   } else {
     System.out.println("Executing K-Means example with default centroid data set.");
     System.out.println("Use --centroids to specify file input.");
     centroids = KMeansData.getDefaultCentroidDataSet(env);
   }
   return centroids;
 }
Ejemplo n.º 6
0
  @SuppressWarnings("serial")
  private static DataSet<Edge<Long, NullValue>> getEdgesDataSet(ExecutionEnvironment env) {

    if (fileOutput) {
      return env.readCsvFile(edgeInputPath)
          .ignoreComments("#")
          .fieldDelimiter("\t")
          .lineDelimiter("\n")
          .types(Long.class, Long.class)
          .map(
              new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
                @Override
                public Edge<Long, NullValue> map(Tuple2<Long, Long> value) throws Exception {
                  return new Edge<>(value.f0, value.f1, NullValue.getInstance());
                }
              });
    } else {
      return ConnectedComponentsDefaultData.getDefaultEdgeDataSet(env);
    }
  }