@Override public Record readRecord(Record target, byte[] bytes, int offset, int numBytes) { String lineStr = new String(bytes, offset, numBytes); // replace reduce whitespaces and trim lineStr = lineStr.replaceAll("\\s+", " ").trim(); // build whitespace tokenizer StringTokenizer st = new StringTokenizer(lineStr, " "); // line must have at least three elements if (st.countTokens() < 3) { return null; } String rdfSubj = st.nextToken(); String rdfPred = st.nextToken(); String rdfObj = st.nextToken(); // we only want foaf:knows predicates if (!rdfPred.equals("<http://xmlns.com/foaf/0.1/knows>")) { return null; } // build node pair from subject and object fromNode.setValue(rdfSubj); toNode.setValue(rdfObj); target.setField(0, fromNode); target.setField(1, toNode); target.setField(2, pathLength); target.setField(3, hopCnt); target.setField(4, hopList); return target; }
/** * Computes a minimum aggregation on the distance of a data point to cluster centers. * * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average * computation in the following reducer) */ @Override public void map(Record dataPointRecord, Collector<Record> out) { Point p = dataPointRecord.getField(1, Point.class); double nearestDistance = Double.MAX_VALUE; int centerId = -1; // check all cluster centers for (PointWithId center : centers) { // compute distance double distance = p.euclideanDistance(center.point); // update nearest cluster if necessary if (distance < nearestDistance) { nearestDistance = distance; centerId = center.id; } } // emit a new record with the center id and the data point. add a one to ease the // implementation of the average function with a combiner result.setField(0, new IntValue(centerId)); result.setField(1, p); result.setField(2, one); out.collect(result); }
@Override public void join(Record rec1, Record rec2, Collector<Record> out) throws Exception { // rec1 has matching start, rec2 matching end // Therefore, rec2's end node and rec1's start node are identical // First half of new path will be rec2, second half will be rec1 // Get from-node and to-node of new path final StringValue fromNode = rec2.getField(0, StringValue.class); final StringValue toNode = rec1.getField(1, StringValue.class); // Check whether from-node = to-node to prevent circles! if (fromNode.equals(toNode)) { return; } // Create new path outputRecord.setField(0, fromNode); outputRecord.setField(1, toNode); // Compute length of new path length.setValue( rec1.getField(2, IntValue.class).getValue() + rec2.getField(2, IntValue.class).getValue()); outputRecord.setField(2, length); // compute hop count int hops = rec1.getField(3, IntValue.class).getValue() + 1 + rec2.getField(3, IntValue.class).getValue(); hopCnt.setValue(hops); outputRecord.setField(3, hopCnt); // Concatenate hops lists and insert matching node StringBuilder sb = new StringBuilder(); // first path sb.append(rec2.getField(4, StringValue.class).getValue()); sb.append(" "); // common node sb.append(rec1.getField(0, StringValue.class).getValue()); // second path sb.append(" "); sb.append(rec1.getField(4, StringValue.class).getValue()); hopList.setValue(sb.toString().trim()); outputRecord.setField(4, hopList); out.collect(outputRecord); }
public boolean readRecord(Record target, byte[] bytes, int offset, int numBytes) { StringValue str = this.theString; if (this.ascii) { str.setValueAscii(bytes, offset, numBytes); } else { ByteBuffer byteWrapper = this.byteWrapper; if (bytes != byteWrapper.array()) { byteWrapper = ByteBuffer.wrap(bytes, 0, bytes.length); this.byteWrapper = byteWrapper; } byteWrapper.limit(offset + numBytes); byteWrapper.position(offset); try { CharBuffer result = this.decoder.decode(byteWrapper); str.setValue(result); } catch (CharacterCodingException e) { byte[] copy = new byte[numBytes]; System.arraycopy(bytes, offset, copy, 0, numBytes); LOG.warn("Line could not be encoded: " + Arrays.toString(copy), e); return false; } } target.clear(); target.setField(this.pos, str); return true; }
private final Record sumPointsAndCount(Iterator<Record> dataPoints) { Record next = null; p.clear(); int count = 0; // compute coordinate vector sum and count while (dataPoints.hasNext()) { next = dataPoints.next(); p.add(next.getField(1, Point.class)); count += next.getField(2, IntValue.class).getValue(); } next.setField(1, p); next.setField(2, new IntValue(count)); return next; }
@Override public void coGroup( Iterator<Record> candidates, Iterator<Record> current, Collector<Record> out) throws Exception { if (!current.hasNext()) { throw new Exception("Error: Id not encountered before."); } Record old = current.next(); long oldId = old.getField(1, LongValue.class).getValue(); long minimumComponentID = Long.MAX_VALUE; while (candidates.hasNext()) { long candidateComponentID = candidates.next().getField(1, LongValue.class).getValue(); if (candidateComponentID < minimumComponentID) { minimumComponentID = candidateComponentID; } } if (minimumComponentID < oldId) { newComponentId.setValue(minimumComponentID); old.setField(1, newComponentId); out.collect(old); } }
@Override public void map(Record record, Collector<Record> out) throws Exception { double x = record.getField(1, DoubleValue.class).getValue(); double y = record.getField(2, DoubleValue.class).getValue(); double z = record.getField(3, DoubleValue.class).getValue(); record.setField(1, new Point(x, y, z)); out.collect(record); }
@Override public Record combineFirst(Iterator<Record> records) { Record next = null; long min = Long.MAX_VALUE; while (records.hasNext()) { next = records.next(); min = Math.min(min, next.getField(1, LongValue.class).getValue()); } newComponentId.setValue(min); next.setField(1, newComponentId); return next; }
@Override public Record readRecord(Record target, byte[] bytes, int offset, int numBytes) { String lineStr = new String(bytes, offset, numBytes); StringTokenizer st = new StringTokenizer(lineStr, "|"); // path must have exactly 5 tokens (fromNode, toNode, length, hopCnt, hopList) if (st.countTokens() != 5) { return null; } this.fromNode.setValue(st.nextToken()); this.toNode.setValue(st.nextToken()); this.length.setValue(Integer.parseInt(st.nextToken())); this.hopCnt.setValue(Integer.parseInt(st.nextToken())); this.hopList.setValue(st.nextToken()); target.setField(0, fromNode); target.setField(1, toNode); target.setField(2, length); target.setField(3, hopCnt); target.setField(4, hopList); return target; }
/** * Filter "lineitem". * * <p>Output Schema: Key: orderkey Value: (partkey, suppkey, quantity, price) */ @Override public void map(Record record, Collector<Record> out) throws Exception { Tuple inputTuple = record.getField(1, Tuple.class); /* Extract the year from the date element of the order relation: */ /* pice = extendedprice * (1 - discount): */ float price = Float.parseFloat(inputTuple.getStringValueAt(5)) * (1 - Float.parseFloat(inputTuple.getStringValueAt(6))); /* Project (orderkey | partkey, suppkey, linenumber, quantity, extendedprice, discount, tax, ...) to (partkey, suppkey, quantity): */ inputTuple.project((0 << 0) | (1 << 1) | (1 << 2) | (0 << 3) | (1 << 4)); inputTuple.addAttribute("" + price); record.setField(1, inputTuple); out.collect(record); }
/** Compute the new position (coordinate vector) of a cluster center. */ @Override public void reduce(Iterator<Record> points, Collector<Record> out) { Record sum = sumPointsAndCount(points); sum.setField(1, sum.getField(1, Point.class).div(sum.getField(2, IntValue.class).getValue())); out.collect(sum); }
@Override public void convert(Record stratosphereRecord, K hadoopKey, V hadoopValue) { stratosphereRecord.setField(0, convert(hadoopKey)); stratosphereRecord.setField(1, convert(hadoopValue)); }
@Override public void coGroup( Iterator<Record> inputRecords, Iterator<Record> concatRecords, Collector<Record> out) { // init minimum length and minimum path Record pathRec = null; StringValue path = null; if (inputRecords.hasNext()) { // path is in input paths pathRec = inputRecords.next(); } else { // path must be in concat paths pathRec = concatRecords.next(); } // get from node (common for all paths) StringValue fromNode = pathRec.getField(0, StringValue.class); // get to node (common for all paths) StringValue toNode = pathRec.getField(1, StringValue.class); // get length of path minLength.setValue(pathRec.getField(2, IntValue.class).getValue()); // store path and hop count path = new StringValue(pathRec.getField(4, StringValue.class)); shortestPaths.add(path); hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); // find shortest path of all input paths while (inputRecords.hasNext()) { pathRec = inputRecords.next(); IntValue length = pathRec.getField(2, IntValue.class); if (length.getValue() == minLength.getValue()) { // path has also minimum length add to list path = new StringValue(pathRec.getField(4, StringValue.class)); if (shortestPaths.add(path)) { hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } else if (length.getValue() < minLength.getValue()) { // path has minimum length minLength.setValue(length.getValue()); // clear lists hopCnts.clear(); shortestPaths.clear(); // get path and add path and hop count path = new StringValue(pathRec.getField(4, StringValue.class)); shortestPaths.add(path); hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } // find shortest path of all input and concatenated paths while (concatRecords.hasNext()) { pathRec = concatRecords.next(); IntValue length = pathRec.getField(2, IntValue.class); if (length.getValue() == minLength.getValue()) { // path has also minimum length add to list path = new StringValue(pathRec.getField(4, StringValue.class)); if (shortestPaths.add(path)) { hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } else if (length.getValue() < minLength.getValue()) { // path has minimum length minLength.setValue(length.getValue()); // clear lists hopCnts.clear(); shortestPaths.clear(); // get path and add path and hop count path = new StringValue(pathRec.getField(4, StringValue.class)); shortestPaths.add(path); hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue())); } } outputRecord.setField(0, fromNode); outputRecord.setField(1, toNode); outputRecord.setField(2, minLength); // emit all shortest paths for (StringValue shortestPath : shortestPaths) { outputRecord.setField(3, hopCnts.get(shortestPath)); outputRecord.setField(4, shortestPath); out.collect(outputRecord); } hopCnts.clear(); shortestPaths.clear(); }