/**
   * Computes a minimum aggregation on the distance of a data point to cluster centers.
   *
   * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average
   * computation in the following reducer)
   */
  @Override
  public void reduce(Iterator<PactRecord> pointsWithDistance, Collector<PactRecord> out) {
    double nearestDistance = Double.MAX_VALUE;
    int nearestClusterId = 0;

    // check all cluster centers
    while (pointsWithDistance.hasNext()) {
      PactRecord res = pointsWithDistance.next();

      double distance = res.getField(3, PactDouble.class).getValue();

      // compare distances
      if (distance < nearestDistance) {
        // if distance is smaller than smallest till now, update nearest cluster
        nearestDistance = distance;
        nearestClusterId = res.getField(2, PactInteger.class).getValue();
        res.getFieldInto(1, this.position);
      }
    }

    // emit a new record with the center id and the data point. add a one to ease the
    // implementation of the average function with a combiner
    this.centerId.setValue(nearestClusterId);
    this.result.setField(0, this.centerId);
    this.result.setField(1, this.position);
    this.result.setField(2, this.one);

    out.collect(this.result);
  }
  @Override
  public void reduce(Iterator<PactRecord> matches, Collector<PactRecord> records) throws Exception {
    PactRecord pr = null;
    PactInteger hashtagID = null;
    int count = 0;
    int minValue = -1;
    timestamps.clear();

    while (matches.hasNext()) {
      pr = matches.next();
      count = pr.getField(2, PactInteger.class).getValue();
      if (count < minValue || minValue == -1) {
        minValue = count;
        hashtagID = pr.getField(1, PactInteger.class);
        timestamps.clear();
        timestamps.add(pr.getField(0, PactString.class));
      } else if (count == minValue) {
        timestamps.add(pr.getField(0, PactString.class));
      }
    }

    if (hashtagID != null) {
      lowsCount.setValue(minValue);
      for (PactString timestamp : timestamps) {
        pr2.setField(0, hashtagID);
        pr2.setField(1, timestamp);
        pr2.setField(2, lowsCount);
        records.collect(pr2);
      }
    }
  }
    /**
     * Filters for records of the rank relation where the rank is greater than the given threshold.
     *
     * <p>Output Format: 0: URL 1: RANK 2: AVG_DURATION
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      if (record.getField(1, PactInteger.class).getValue() > RANKFILTER) {
        out.collect(record);
      }
    }
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#equalToReference(java.lang.Object)
  */
 @Override
 public boolean equalToReference(PactRecord candidate) {
   for (int i = 0; i < this.keyFields.length; i++) {
     final Key k = candidate.getField(this.keyFields[i], this.transientKeyHolders[i]);
     if (k == null) throw new NullKeyFieldException(this.keyFields[i]);
     else if (!k.equals(this.keyHolders[i])) return false;
   }
   return true;
 }
    /**
     * Filters for records of the visits relation where the year of visit is equal to a specified
     * value. The URL of all visit records passing the filter is emitted.
     *
     * <p>Output Format: 0: URL
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      // Parse date string with the format YYYY-MM-DD and extract the year
      String dateString = record.getField(1, PactString.class).getValue();
      int year = Integer.parseInt(dateString.substring(0, 4));

      if (year == YEARFILTER) {
        record.setNull(1);
        out.collect(record);
      }
    }
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.generic.types.TypeComparator#putNormalizedKey(java.lang.Object, byte[], int, int)
  */
 @Override
 public void putNormalizedKey(PactRecord record, MemorySegment target, int offset, int numBytes) {
   int i = 0;
   try {
     for (; i < this.numLeadingNormalizableKeys & numBytes > 0; i++) {
       int len = this.normalizedKeyLengths[i];
       len = numBytes >= len ? len : numBytes;
       ((NormalizableKey) record.getField(this.keyFields[i], this.transientKeyHolders[i]))
           .copyNormalizedKey(target, offset, len);
       numBytes -= len;
       offset += len;
     }
   } catch (NullPointerException npex) {
     throw new NullKeyFieldException(this.keyFields[i]);
   }
 }
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#hash(java.lang.Object)
  */
 @Override
 public int hash(PactRecord object) {
   int i = 0;
   try {
     int code = 0;
     for (; i < this.keyFields.length; i++) {
       code ^= object.getField(this.keyFields[i], this.transientKeyHolders[i]).hashCode();
       code *= HASH_SALT[i & 0x1F]; // salt code with (i % HASH_SALT.length)-th salt component
     }
     return code;
   } catch (NullPointerException npex) {
     throw new NullKeyFieldException(this.keyFields[i]);
   } catch (IndexOutOfBoundsException iobex) {
     throw new KeyFieldOutOfBoundsException(this.keyFields[i]);
   }
 }
    /**
     * Filters for documents that contain all of the given keywords and projects the records on the
     * URL field.
     *
     * <p>Output Format: 0: URL
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      // FILTER
      // Only collect the document if all keywords are contained
      String docText = record.getField(1, PactString.class).toString();
      boolean allContained = true;
      for (String kw : KEYWORDS) {
        if (!docText.contains(kw)) {
          allContained = false;
          break;
        }
      }

      if (allContained) {
        record.setNull(1);
        out.collect(record);
      }
    }
  /** Computes a minimum aggregation on the distance of a data point to cluster centers. */
  @Override
  public void combine(Iterator<PactRecord> pointsWithDistance, Collector<PactRecord> out) {
    double nearestDistance = Double.MAX_VALUE;

    // check all cluster centers
    while (pointsWithDistance.hasNext()) {
      PactRecord res = pointsWithDistance.next();
      double distance = res.getField(3, PactDouble.class).getValue();

      // compare distances
      if (distance < nearestDistance) {
        nearestDistance = distance;
        res.copyTo(this.nearest);
      }
    }

    // emit nearest one
    out.collect(this.nearest);
  }
  /* (non-Javadoc)
   * @see eu.stratosphere.pact.common.recordio.OutputFormat#writeRecord(eu.stratosphere.pact.common.type.PactRecord)
   */
  @Override
  public void writeRecord(PactRecord record) throws IOException {
    int numRecFields = record.getNumFields();
    int readPos;

    for (int i = 0; i < this.numFields; i++) {

      readPos = this.recordPositions[i];

      if (readPos < numRecFields) {

        Value v = record.getField(this.recordPositions[i], this.classes[i]);

        if (v != null) {
          if (i != 0) this.wrt.write(this.fieldDelimiter);
          this.wrt.write(v.toString());

        } else {
          if (this.lenient) {
            if (i != 0) this.wrt.write(this.fieldDelimiter);
          } else {
            throw new RuntimeException(
                "Cannot serialize record with <null> value at position: " + readPos);
          }
        }

      } else {
        if (this.lenient) {
          if (i != 0) this.wrt.write(this.fieldDelimiter);
        } else {
          throw new RuntimeException(
              "Cannot serialize record with out field at position: " + readPos);
        }
      }
    }

    // add the record delimiter
    this.wrt.write(this.recordDelimiter);
  }