/** * Computes a minimum aggregation on the distance of a data point to cluster centers. * * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average * computation in the following reducer) */ @Override public void reduce(Iterator<PactRecord> pointsWithDistance, Collector<PactRecord> out) { double nearestDistance = Double.MAX_VALUE; int nearestClusterId = 0; // check all cluster centers while (pointsWithDistance.hasNext()) { PactRecord res = pointsWithDistance.next(); double distance = res.getField(3, PactDouble.class).getValue(); // compare distances if (distance < nearestDistance) { // if distance is smaller than smallest till now, update nearest cluster nearestDistance = distance; nearestClusterId = res.getField(2, PactInteger.class).getValue(); res.getFieldInto(1, this.position); } } // emit a new record with the center id and the data point. add a one to ease the // implementation of the average function with a combiner this.centerId.setValue(nearestClusterId); this.result.setField(0, this.centerId); this.result.setField(1, this.position); this.result.setField(2, this.one); out.collect(this.result); }
/* (non-Javadoc) * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#setReferenceForEquality(java.lang.Object) */ @Override public void setReference(PactRecord toCompare) { for (int i = 0; i < this.keyFields.length; i++) { if (!toCompare.getFieldInto(this.keyFields[i], this.keyHolders[i])) { throw new NullKeyFieldException(this.keyFields[i]); } } }