@Override public void coGroup( Iterator<Record> candidates, Iterator<Record> current, Collector<Record> out) throws Exception { if (!current.hasNext()) { throw new Exception("Error: Id not encountered before."); } Record old = current.next(); long oldId = old.getField(1, LongValue.class).getValue(); long minimumComponentID = Long.MAX_VALUE; while (candidates.hasNext()) { long candidateComponentID = candidates.next().getField(1, LongValue.class).getValue(); if (candidateComponentID < minimumComponentID) { minimumComponentID = candidateComponentID; } } if (minimumComponentID < oldId) { newComponentId.setValue(minimumComponentID); old.setField(1, newComponentId); out.collect(old); } }
/** * Computes a minimum aggregation on the distance of a data point to cluster centers. * * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average * computation in the following reducer) */ @Override public void map(Record dataPointRecord, Collector<Record> out) { Point p = dataPointRecord.getField(1, Point.class); double nearestDistance = Double.MAX_VALUE; int centerId = -1; // check all cluster centers for (PointWithId center : centers) { // compute distance double distance = p.euclideanDistance(center.point); // update nearest cluster if necessary if (distance < nearestDistance) { nearestDistance = distance; centerId = center.id; } } // emit a new record with the center id and the data point. add a one to ease the // implementation of the average function with a combiner result.setField(0, new IntValue(centerId)); result.setField(1, p); result.setField(2, one); out.collect(result); }
@Override public void map(Record record, Collector<Record> out) throws Exception { if (++this.cnt >= 10) { throw new ExpectedTestException(); } out.collect(record); }
@Override public void map(Record record, Collector<Record> out) throws Exception { double x = record.getField(1, DoubleValue.class).getValue(); double y = record.getField(2, DoubleValue.class).getValue(); double z = record.getField(3, DoubleValue.class).getValue(); record.setField(1, new Point(x, y, z)); out.collect(record); }
@Override public void join(Record rec1, Record rec2, Collector<Record> out) throws Exception { // rec1 has matching start, rec2 matching end // Therefore, rec2's end node and rec1's start node are identical // First half of new path will be rec2, second half will be rec1 // Get from-node and to-node of new path final StringValue fromNode = rec2.getField(0, StringValue.class); final StringValue toNode = rec1.getField(1, StringValue.class); // Check whether from-node = to-node to prevent circles! if (fromNode.equals(toNode)) { return; } // Create new path outputRecord.setField(0, fromNode); outputRecord.setField(1, toNode); // Compute length of new path length.setValue( rec1.getField(2, IntValue.class).getValue() + rec2.getField(2, IntValue.class).getValue()); outputRecord.setField(2, length); // compute hop count int hops = rec1.getField(3, IntValue.class).getValue() + 1 + rec2.getField(3, IntValue.class).getValue(); hopCnt.setValue(hops); outputRecord.setField(3, hopCnt); // Concatenate hops lists and insert matching node StringBuilder sb = new StringBuilder(); // first path sb.append(rec2.getField(4, StringValue.class).getValue()); sb.append(" "); // common node sb.append(rec1.getField(0, StringValue.class).getValue()); // second path sb.append(" "); sb.append(rec1.getField(4, StringValue.class).getValue()); hopList.setValue(sb.toString().trim()); outputRecord.setField(4, hopList); out.collect(outputRecord); }
/** * Filter "lineitem". * * <p>Output Schema: Key: orderkey Value: (partkey, suppkey, quantity, price) */ @Override public void map(Record record, Collector<Record> out) throws Exception { Tuple inputTuple = record.getField(1, Tuple.class); /* Extract the year from the date element of the order relation: */ /* pice = extendedprice * (1 - discount): */ float price = Float.parseFloat(inputTuple.getStringValueAt(5)) * (1 - Float.parseFloat(inputTuple.getStringValueAt(6))); /* Project (orderkey | partkey, suppkey, linenumber, quantity, extendedprice, discount, tax, ...) to (partkey, suppkey, quantity): */ inputTuple.project((0 << 0) | (1 << 1) | (1 << 2) | (0 << 3) | (1 << 4)); inputTuple.addAttribute("" + price); record.setField(1, inputTuple); out.collect(record); }
/** * Splits the document into terms and emits a PactRecord (docId, term, tf) for each term of the * document. * * <p>Each input document has the format "docId, document contents". */ @Override public void map(Record record, Collector<Record> collector) { String document = record.getField(0, StringValue.class).toString(); // split document into a , separated list String data[] = document.split(","); int docID = Integer.parseInt(data[0]); // String docID = data[0]; document = data[1]; document = document.replaceAll("\\W", " ").toLowerCase(); StringTokenizer tokenizer = new StringTokenizer(document); HashSet<String> stopWords = Util.STOP_WORDS; Map<String, Integer> map = new HashMap<String, Integer>(); // to identify the frequency of each word in the document int co = 1; while (tokenizer.hasMoreElements()) { String word = tokenizer.nextToken(); if (stopWords.contains(word.toString())) { continue; } if (map.containsKey(word)) { // if the word added previously increment the count by one co++; map.put(word, co); } else { // add a new word to the map co = 1; map.put(word, co); } } Iterator iterator = map.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry pairs = (Map.Entry) iterator.next(); String word = pairs.getKey().toString(); int occur = Integer.parseInt(pairs.getValue().toString()); result.setField(0, new IntValue(docID)); result.setField(1, new StringValue(word)); result.setField(2, new IntValue(occur)); collector.collect(result); } }
@Override public void map(Record record, Collector<Record> out) throws Exception { for (Record model : this.models) { // compute dot product between model and pair long product = 0; for (int i = 1; i <= NUM_FEATURES; i++) { product += model.getField(i, this.lft).getValue() * record.getField(i, this.rgt).getValue(); } this.prd.setValue(product); // construct result this.result.copyFrom(model, new int[] {0}, new int[] {0}); this.result.copyFrom(record, new int[] {0}, new int[] {1}); this.result.setField(2, this.prd); // emit result out.collect(this.result); } }
/** Computes a pre-aggregated average value of a coordinate vector. */ @Override public void combine(Iterator<Record> points, Collector<Record> out) { out.collect(sumPointsAndCount(points)); }
/** Compute the new position (coordinate vector) of a cluster center. */ @Override public void reduce(Iterator<Record> points, Collector<Record> out) { Record sum = sumPointsAndCount(points); sum.setField(1, sum.getField(1, Point.class).div(sum.getField(2, IntValue.class).getValue())); out.collect(sum); }
@Override public void coGroup( Iterator<Record> inputRecords, Iterator<Record> concatRecords, Collector<Record> out) { // init minimum length and minimum path Record pathRec = null; StringValue path = null; if (inputRecords.hasNext()) { // path is in input paths pathRec = inputRecords.next(); } else { // path must be in concat paths pathRec = concatRecords.next(); } // get from node (common for all paths) StringValue fromNode = pathRec.getField(0, StringValue.class); // get to node (common for all paths) StringValue toNode = pathRec.getField(1, StringValue.class); // get length of path minLength.setValue(pathRec.getField(2, IntValue.class).getValue()); // store path and hop count path = new StringValue(pathRec.getField(4, StringValue.class)); shortestPaths.add(path); hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); // find shortest path of all input paths while (inputRecords.hasNext()) { pathRec = inputRecords.next(); IntValue length = pathRec.getField(2, IntValue.class); if (length.getValue() == minLength.getValue()) { // path has also minimum length add to list path = new StringValue(pathRec.getField(4, StringValue.class)); if (shortestPaths.add(path)) { hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } else if (length.getValue() < minLength.getValue()) { // path has minimum length minLength.setValue(length.getValue()); // clear lists hopCnts.clear(); shortestPaths.clear(); // get path and add path and hop count path = new StringValue(pathRec.getField(4, StringValue.class)); shortestPaths.add(path); hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } // find shortest path of all input and concatenated paths while (concatRecords.hasNext()) { pathRec = concatRecords.next(); IntValue length = pathRec.getField(2, IntValue.class); if (length.getValue() == minLength.getValue()) { // path has also minimum length add to list path = new StringValue(pathRec.getField(4, StringValue.class)); if (shortestPaths.add(path)) { hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } else if (length.getValue() < minLength.getValue()) { // path has minimum length minLength.setValue(length.getValue()); // clear lists hopCnts.clear(); shortestPaths.clear(); // get path and add path and hop count path = new StringValue(pathRec.getField(4, StringValue.class)); shortestPaths.add(path); hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } outputRecord.setField(0, fromNode); outputRecord.setField(1, toNode); outputRecord.setField(2, minLength); // emit all shortest paths for (StringValue shortestPath : shortestPaths) { outputRecord.setField(3, hopCnts.get(shortestPath)); outputRecord.setField(4, shortestPath); out.collect(outputRecord); } hopCnts.clear(); shortestPaths.clear(); }
@Override public void map(Record record, Collector<Record> out) throws Exception { out.collect(record); }