/** * This is the case for a reduce-all case (in contrast to the reduce-per-group case). * * @param input * @param function */ public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function) { super(input, input.getType()); if (function == null) throw new NullPointerException("Reduce function must not be null."); this.function = function; this.grouper = null; }
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String input = args[0]; final String output = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(input); DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); result.writeAsText(output); env.execute("Word Count"); }
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<Lineitem> li = getLineitemDataSet(env); DataSet<Order> or = getOrdersDataSet(env); DataSet<Customer> cust = getCustomerDataSet(env); // Filter market segment "AUTOMOBILE" cust = cust.filter( new FilterFunction<Customer>() { @Override public boolean filter(Customer value) { return value.getMktsegment().equals("AUTOMOBILE"); } }); // Filter all Orders with o_orderdate < 12.03.1995 or = or.filter( new FilterFunction<Order>() { private DateFormat format = new SimpleDateFormat("yyyy-MM-dd"); private Date date; { Calendar cal = Calendar.getInstance(); cal.set(1995, 3, 12); date = cal.getTime(); } @Override public boolean filter(Order value) throws ParseException { Date orderDate = format.parse(value.getOrderdate()); return orderDate.before(date); } }); // Filter all Lineitems with l_shipdate > 12.03.1995 li = li.filter( new FilterFunction<Lineitem>() { private DateFormat format = new SimpleDateFormat("yyyy-MM-dd"); private Date date; { Calendar cal = Calendar.getInstance(); cal.set(1995, 3, 12); date = cal.getTime(); } @Override public boolean filter(Lineitem value) throws ParseException { Date shipDate = format.parse(value.getShipdate()); return shipDate.after(date); } }); // Join customers with orders and package them into a ShippingPriorityItem DataSet<ShippingPriorityItem> customerWithOrders = cust.join(or) .where(0) .equalTo(0) .with( new JoinFunction<Customer, Order, ShippingPriorityItem>() { @Override public ShippingPriorityItem join(Customer first, Order second) { return new ShippingPriorityItem( 0, 0.0, second.getOrderdate(), second.getShippriority(), second.getOrderkey()); } }); // Join the last join result with Lineitems DataSet<ShippingPriorityItem> joined = customerWithOrders .join(li) .where(4) .equalTo(0) .with( new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() { @Override public ShippingPriorityItem join(ShippingPriorityItem first, Lineitem second) { first.setL_Orderkey(second.getOrderkey()); first.setRevenue(second.getExtendedprice() * (1 - second.getDiscount())); return first; } }); // Group by l_orderkey, o_orderdate and o_shippriority and compute revenue sum joined = joined.groupBy(0, 2, 3).aggregate(Aggregations.SUM, 1); // emit result joined.writeAsCsv(outputPath, "\n", "|"); // execute program env.execute("TPCH Query 3 Example"); }