Ejemplo n.º 1
0
  /**
   * Computes a minimum aggregation on the distance of a data point to cluster centers.
   *
   * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average
   * computation in the following reducer)
   */
  @Override
  public void reduce(Iterator<PactRecord> pointsWithDistance, Collector<PactRecord> out) {
    double nearestDistance = Double.MAX_VALUE;
    int nearestClusterId = 0;

    // check all cluster centers
    while (pointsWithDistance.hasNext()) {
      PactRecord res = pointsWithDistance.next();

      double distance = res.getField(3, PactDouble.class).getValue();

      // compare distances
      if (distance < nearestDistance) {
        // if distance is smaller than smallest till now, update nearest cluster
        nearestDistance = distance;
        nearestClusterId = res.getField(2, PactInteger.class).getValue();
        res.getFieldInto(1, this.position);
      }
    }

    // emit a new record with the center id and the data point. add a one to ease the
    // implementation of the average function with a combiner
    this.centerId.setValue(nearestClusterId);
    this.result.setField(0, this.centerId);
    this.result.setField(1, this.position);
    this.result.setField(2, this.one);

    out.collect(this.result);
  }
    /**
     * Filters for records of the visits relation where the year of visit is equal to a specified
     * value. The URL of all visit records passing the filter is emitted.
     *
     * <p>Output Format: 0: URL
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      // Parse date string with the format YYYY-MM-DD and extract the year
      String dateString = record.getField(1, PactString.class).getValue();
      int year = Integer.parseInt(dateString.substring(0, 4));

      if (year == YEARFILTER) {
        record.setNull(1);
        out.collect(record);
      }
    }
    /**
     * Filters for records of the rank relation where the rank is greater than the given threshold.
     *
     * <p>Output Format: 0: URL 1: RANK 2: AVG_DURATION
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      if (record.getField(1, PactInteger.class).getValue() > RANKFILTER) {
        out.collect(record);
      }
    }
  @Override
  public PactRecord peek() {
    if (readNext) {
      int bytesRead =
          currentReadRecord.deserialize(segments, CURRENT_READ_SEGMENT_INDEX, currentReadOffset);
      while (bytesRead > 0) {
        if (currentReadSegment.size() - currentReadOffset > bytesRead) {
          currentReadOffset += bytesRead;
          bytesRead = 0;
        } else {
          bytesRead -= (currentReadSegment.size() - currentReadOffset);

          // Remove old read segment from list & release in memory manager
          MemorySegment unused = segments.remove(CURRENT_READ_SEGMENT_INDEX);
          memoryManager.release(unused);

          // Update reference to new read segment
          currentReadSegment = segments.get(CURRENT_READ_SEGMENT_INDEX);
          currentReadOffset = 0;
        }
      }
      readNext = false;
    }

    return currentReadRecord;
  }
Ejemplo n.º 5
0
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#setReferenceForEquality(java.lang.Object)
  */
 @Override
 public void setReference(PactRecord toCompare) {
   for (int i = 0; i < this.keyFields.length; i++) {
     if (!toCompare.getFieldInto(this.keyFields[i], this.keyHolders[i])) {
       throw new NullKeyFieldException(this.keyFields[i]);
     }
   }
 }
  @Override
  public void reduce(Iterator<PactRecord> matches, Collector<PactRecord> records) throws Exception {
    PactRecord pr = null;
    PactInteger hashtagID = null;
    int count = 0;
    int minValue = -1;
    timestamps.clear();

    while (matches.hasNext()) {
      pr = matches.next();
      count = pr.getField(2, PactInteger.class).getValue();
      if (count < minValue || minValue == -1) {
        minValue = count;
        hashtagID = pr.getField(1, PactInteger.class);
        timestamps.clear();
        timestamps.add(pr.getField(0, PactString.class));
      } else if (count == minValue) {
        timestamps.add(pr.getField(0, PactString.class));
      }
    }

    if (hashtagID != null) {
      lowsCount.setValue(minValue);
      for (PactString timestamp : timestamps) {
        pr2.setField(0, hashtagID);
        pr2.setField(1, timestamp);
        pr2.setField(2, lowsCount);
        records.collect(pr2);
      }
    }
  }
Ejemplo n.º 7
0
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#equalToReference(java.lang.Object)
  */
 @Override
 public boolean equalToReference(PactRecord candidate) {
   for (int i = 0; i < this.keyFields.length; i++) {
     final Key k = candidate.getField(this.keyFields[i], this.transientKeyHolders[i]);
     if (k == null) throw new NullKeyFieldException(this.keyFields[i]);
     else if (!k.equals(this.keyHolders[i])) return false;
   }
   return true;
 }
    /**
     * Filters for documents that contain all of the given keywords and projects the records on the
     * URL field.
     *
     * <p>Output Format: 0: URL
     */
    @Override
    public void map(PactRecord record, Collector<PactRecord> out) throws Exception {

      // FILTER
      // Only collect the document if all keywords are contained
      String docText = record.getField(1, PactString.class).toString();
      boolean allContained = true;
      for (String kw : KEYWORDS) {
        if (!docText.contains(kw)) {
          allContained = false;
          break;
        }
      }

      if (allContained) {
        record.setNull(1);
        out.collect(record);
      }
    }
Ejemplo n.º 9
0
  /** Computes a minimum aggregation on the distance of a data point to cluster centers. */
  @Override
  public void combine(Iterator<PactRecord> pointsWithDistance, Collector<PactRecord> out) {
    double nearestDistance = Double.MAX_VALUE;

    // check all cluster centers
    while (pointsWithDistance.hasNext()) {
      PactRecord res = pointsWithDistance.next();
      double distance = res.getField(3, PactDouble.class).getValue();

      // compare distances
      if (distance < nearestDistance) {
        nearestDistance = distance;
        res.copyTo(this.nearest);
      }
    }

    // emit nearest one
    out.collect(this.nearest);
  }
  @Override
  public void runIteration(IterationIterator iterationIter) throws Exception {
    int counter = 0;

    while (iterationIter.next(tc)) {
      counter++;
    }

    count.setValue(counter);
    result.setField(0, count);
    output.collect(result);
  }
  /* (non-Javadoc)
   * @see eu.stratosphere.pact.common.recordio.OutputFormat#writeRecord(eu.stratosphere.pact.common.type.PactRecord)
   */
  @Override
  public void writeRecord(PactRecord record) throws IOException {
    int numRecFields = record.getNumFields();
    int readPos;

    for (int i = 0; i < this.numFields; i++) {

      readPos = this.recordPositions[i];

      if (readPos < numRecFields) {

        Value v = record.getField(this.recordPositions[i], this.classes[i]);

        if (v != null) {
          if (i != 0) this.wrt.write(this.fieldDelimiter);
          this.wrt.write(v.toString());

        } else {
          if (this.lenient) {
            if (i != 0) this.wrt.write(this.fieldDelimiter);
          } else {
            throw new RuntimeException(
                "Cannot serialize record with <null> value at position: " + readPos);
          }
        }

      } else {
        if (this.lenient) {
          if (i != 0) this.wrt.write(this.fieldDelimiter);
        } else {
          throw new RuntimeException(
              "Cannot serialize record with out field at position: " + readPos);
        }
      }
    }

    // add the record delimiter
    this.wrt.write(this.recordDelimiter);
  }
Ejemplo n.º 12
0
 public final Key[] getKeysAsCopy(PactRecord record) {
   try {
     final Key[] keys = new Key[this.keyFields.length];
     for (int i = 0; i < keys.length; i++) {
       keys[i] = this.keyHolders[i].getClass().newInstance();
     }
     record.getFieldsInto(this.keyFields, keys);
     return keys;
   } catch (Exception ex) {
     // this should never happen, because the classes have been instantiated before. Report for
     // debugging.
     throw new RuntimeException(
         "Could not instantiate key classes when duplicating PactRecordComparator.", ex);
   }
 }
Ejemplo n.º 13
0
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.generic.types.TypeComparator#putNormalizedKey(java.lang.Object, byte[], int, int)
  */
 @Override
 public void putNormalizedKey(PactRecord record, MemorySegment target, int offset, int numBytes) {
   int i = 0;
   try {
     for (; i < this.numLeadingNormalizableKeys & numBytes > 0; i++) {
       int len = this.normalizedKeyLengths[i];
       len = numBytes >= len ? len : numBytes;
       ((NormalizableKey) record.getField(this.keyFields[i], this.transientKeyHolders[i]))
           .copyNormalizedKey(target, offset, len);
       numBytes -= len;
       offset += len;
     }
   } catch (NullPointerException npex) {
     throw new NullKeyFieldException(this.keyFields[i]);
   }
 }
Ejemplo n.º 14
0
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.runtime.plugable.TypeAccessors#hash(java.lang.Object)
  */
 @Override
 public int hash(PactRecord object) {
   int i = 0;
   try {
     int code = 0;
     for (; i < this.keyFields.length; i++) {
       code ^= object.getField(this.keyFields[i], this.transientKeyHolders[i]).hashCode();
       code *= HASH_SALT[i & 0x1F]; // salt code with (i % HASH_SALT.length)-th salt component
     }
     return code;
   } catch (NullPointerException npex) {
     throw new NullKeyFieldException(this.keyFields[i]);
   } catch (IndexOutOfBoundsException iobex) {
     throw new KeyFieldOutOfBoundsException(this.keyFields[i]);
   }
 }
  @Override
  public boolean offer(PactRecord rec) {
    try {
      rec.serialize(null, currentWriteSegment.outputView, allocatingIterator, newSegments);
    } catch (Exception ex) {
      throw new RuntimeException("Bad error during serialization", ex);
    }

    if (!newSegments.isEmpty()) {
      segments.addAll(newSegments);
      currentWriteSegment = segments.get(segments.size() - 1);
      newSegments.clear();
    }

    count++;

    return true;
  }