@Override
  public void execute(
      JavaSparkContext ctx, SQLContext sqlContext, WorkflowContext workflowContext, DataFrame df) {

    workflowContext.out("Executing NodePrintFirstNRows : " + id);

    Row[] rows = df.take(n);

    for (Row row : rows) {
      workflowContext.out(row.toString());
    }

    super.execute(ctx, sqlContext, workflowContext, df);
  }
Exemple #2
0
    @Override
    public ArrayList<String> call(JobContext jc) {
      String inputFile = "src/test/resources/testweet.json";
      SQLContext sqlctx = jc.sqlctx();
      DataFrame input = sqlctx.jsonFile(inputFile);
      input.registerTempTable("tweets");

      DataFrame topTweets =
          sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10");
      ArrayList<String> tweetList = new ArrayList<>();
      for (Row r : topTweets.collect()) {
        tweetList.add(r.toString());
      }
      return tweetList;
    }