예제 #1
0
  /**
   * Computes a minimum aggregation on the distance of a data point to cluster centers.
   *
   * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average
   * computation in the following reducer)
   */
  @Override
  public void reduce(Iterator<PactRecord> pointsWithDistance, Collector<PactRecord> out) {
    double nearestDistance = Double.MAX_VALUE;
    int nearestClusterId = 0;

    // check all cluster centers
    while (pointsWithDistance.hasNext()) {
      PactRecord res = pointsWithDistance.next();

      double distance = res.getField(3, PactDouble.class).getValue();

      // compare distances
      if (distance < nearestDistance) {
        // if distance is smaller than smallest till now, update nearest cluster
        nearestDistance = distance;
        nearestClusterId = res.getField(2, PactInteger.class).getValue();
        res.getFieldInto(1, this.position);
      }
    }

    // emit a new record with the center id and the data point. add a one to ease the
    // implementation of the average function with a combiner
    this.centerId.setValue(nearestClusterId);
    this.result.setField(0, this.centerId);
    this.result.setField(1, this.position);
    this.result.setField(2, this.one);

    out.collect(this.result);
  }
예제 #2
0
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#setReferenceForEquality(java.lang.Object)
  */
 @Override
 public void setReference(PactRecord toCompare) {
   for (int i = 0; i < this.keyFields.length; i++) {
     if (!toCompare.getFieldInto(this.keyFields[i], this.keyHolders[i])) {
       throw new NullKeyFieldException(this.keyFields[i]);
     }
   }
 }