Example #1
0
  /**
   * This is the case for a reduce-all case (in contrast to the reduce-per-group case).
   *
   * @param input
   * @param function
   */
  public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function) {
    super(input, input.getType());

    if (function == null) throw new NullPointerException("Reduce function must not be null.");

    this.function = function;
    this.grouper = null;
  }
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println("Usage: WordCount <input path> <result path>");
      return;
    }

    final String input = args[0];
    final String output = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<String> text = env.readTextFile(input);

    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    result.writeAsText(output);
    env.execute("Word Count");
  }
Example #3
0
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    // get input data
    DataSet<Lineitem> li = getLineitemDataSet(env);
    DataSet<Order> or = getOrdersDataSet(env);
    DataSet<Customer> cust = getCustomerDataSet(env);

    // Filter market segment "AUTOMOBILE"
    cust =
        cust.filter(
            new FilterFunction<Customer>() {
              @Override
              public boolean filter(Customer value) {
                return value.getMktsegment().equals("AUTOMOBILE");
              }
            });

    // Filter all Orders with o_orderdate < 12.03.1995
    or =
        or.filter(
            new FilterFunction<Order>() {
              private DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
              private Date date;

              {
                Calendar cal = Calendar.getInstance();
                cal.set(1995, 3, 12);
                date = cal.getTime();
              }

              @Override
              public boolean filter(Order value) throws ParseException {
                Date orderDate = format.parse(value.getOrderdate());
                return orderDate.before(date);
              }
            });

    // Filter all Lineitems with l_shipdate > 12.03.1995
    li =
        li.filter(
            new FilterFunction<Lineitem>() {
              private DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
              private Date date;

              {
                Calendar cal = Calendar.getInstance();
                cal.set(1995, 3, 12);
                date = cal.getTime();
              }

              @Override
              public boolean filter(Lineitem value) throws ParseException {
                Date shipDate = format.parse(value.getShipdate());
                return shipDate.after(date);
              }
            });

    // Join customers with orders and package them into a ShippingPriorityItem
    DataSet<ShippingPriorityItem> customerWithOrders =
        cust.join(or)
            .where(0)
            .equalTo(0)
            .with(
                new JoinFunction<Customer, Order, ShippingPriorityItem>() {
                  @Override
                  public ShippingPriorityItem join(Customer first, Order second) {
                    return new ShippingPriorityItem(
                        0,
                        0.0,
                        second.getOrderdate(),
                        second.getShippriority(),
                        second.getOrderkey());
                  }
                });

    // Join the last join result with Lineitems
    DataSet<ShippingPriorityItem> joined =
        customerWithOrders
            .join(li)
            .where(4)
            .equalTo(0)
            .with(
                new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {
                  @Override
                  public ShippingPriorityItem join(ShippingPriorityItem first, Lineitem second) {
                    first.setL_Orderkey(second.getOrderkey());
                    first.setRevenue(second.getExtendedprice() * (1 - second.getDiscount()));
                    return first;
                  }
                });

    // Group by l_orderkey, o_orderdate and o_shippriority and compute revenue sum
    joined = joined.groupBy(0, 2, 3).aggregate(Aggregations.SUM, 1);

    // emit result
    joined.writeAsCsv(outputPath, "\n", "|");

    // execute program
    env.execute("TPCH Query 3 Example");
  }