@Override
    public void coGroup(
        Iterator<Record> candidates, Iterator<Record> current, Collector<Record> out)
        throws Exception {
      if (!current.hasNext()) {
        throw new Exception("Error: Id not encountered before.");
      }
      Record old = current.next();
      long oldId = old.getField(1, LongValue.class).getValue();

      long minimumComponentID = Long.MAX_VALUE;

      while (candidates.hasNext()) {
        long candidateComponentID = candidates.next().getField(1, LongValue.class).getValue();
        if (candidateComponentID < minimumComponentID) {
          minimumComponentID = candidateComponentID;
        }
      }

      if (minimumComponentID < oldId) {
        newComponentId.setValue(minimumComponentID);
        old.setField(1, newComponentId);
        out.collect(old);
      }
    }
Пример #2
0
    /**
     * Computes a minimum aggregation on the distance of a data point to cluster centers.
     *
     * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average
     * computation in the following reducer)
     */
    @Override
    public void map(Record dataPointRecord, Collector<Record> out) {
      Point p = dataPointRecord.getField(1, Point.class);

      double nearestDistance = Double.MAX_VALUE;
      int centerId = -1;

      // check all cluster centers
      for (PointWithId center : centers) {
        // compute distance
        double distance = p.euclideanDistance(center.point);

        // update nearest cluster if necessary
        if (distance < nearestDistance) {
          nearestDistance = distance;
          centerId = center.id;
        }
      }

      // emit a new record with the center id and the data point. add a one to ease the
      // implementation of the average function with a combiner
      result.setField(0, new IntValue(centerId));
      result.setField(1, p);
      result.setField(2, one);

      out.collect(result);
    }
Пример #3
0
 @Override
 public void map(Record record, Collector<Record> out) throws Exception {
   if (++this.cnt >= 10) {
     throw new ExpectedTestException();
   }
   out.collect(record);
 }
Пример #4
0
    @Override
    public void map(Record record, Collector<Record> out) throws Exception {
      double x = record.getField(1, DoubleValue.class).getValue();
      double y = record.getField(2, DoubleValue.class).getValue();
      double z = record.getField(3, DoubleValue.class).getValue();

      record.setField(1, new Point(x, y, z));
      out.collect(record);
    }
Пример #5
0
    @Override
    public void join(Record rec1, Record rec2, Collector<Record> out) throws Exception {

      // rec1 has matching start, rec2 matching end
      // Therefore, rec2's end node and rec1's start node are identical
      // First half of new path will be rec2, second half will be rec1

      // Get from-node and to-node of new path
      final StringValue fromNode = rec2.getField(0, StringValue.class);
      final StringValue toNode = rec1.getField(1, StringValue.class);

      // Check whether from-node = to-node to prevent circles!
      if (fromNode.equals(toNode)) {
        return;
      }

      // Create new path
      outputRecord.setField(0, fromNode);
      outputRecord.setField(1, toNode);

      // Compute length of new path
      length.setValue(
          rec1.getField(2, IntValue.class).getValue()
              + rec2.getField(2, IntValue.class).getValue());
      outputRecord.setField(2, length);

      // compute hop count
      int hops =
          rec1.getField(3, IntValue.class).getValue()
              + 1
              + rec2.getField(3, IntValue.class).getValue();
      hopCnt.setValue(hops);
      outputRecord.setField(3, hopCnt);

      // Concatenate hops lists and insert matching node
      StringBuilder sb = new StringBuilder();
      // first path
      sb.append(rec2.getField(4, StringValue.class).getValue());
      sb.append(" ");
      // common node
      sb.append(rec1.getField(0, StringValue.class).getValue());
      // second path
      sb.append(" ");
      sb.append(rec1.getField(4, StringValue.class).getValue());

      hopList.setValue(sb.toString().trim());
      outputRecord.setField(4, hopList);

      out.collect(outputRecord);
    }
Пример #6
0
  /**
   * Filter "lineitem".
   *
   * <p>Output Schema: Key: orderkey Value: (partkey, suppkey, quantity, price)
   */
  @Override
  public void map(Record record, Collector<Record> out) throws Exception {
    Tuple inputTuple = record.getField(1, Tuple.class);

    /* Extract the year from the date element of the order relation: */

    /* pice = extendedprice * (1 - discount): */
    float price =
        Float.parseFloat(inputTuple.getStringValueAt(5))
            * (1 - Float.parseFloat(inputTuple.getStringValueAt(6)));
    /* Project (orderkey | partkey, suppkey, linenumber, quantity, extendedprice, discount, tax, ...) to (partkey, suppkey, quantity): */
    inputTuple.project((0 << 0) | (1 << 1) | (1 << 2) | (0 << 3) | (1 << 4));
    inputTuple.addAttribute("" + price);
    record.setField(1, inputTuple);
    out.collect(record);
  }
Пример #7
0
  /**
   * Splits the document into terms and emits a PactRecord (docId, term, tf) for each term of the
   * document.
   *
   * <p>Each input document has the format "docId, document contents".
   */
  @Override
  public void map(Record record, Collector<Record> collector) {
    String document = record.getField(0, StringValue.class).toString();

    // split document into a , separated list
    String data[] = document.split(",");
    int docID = Integer.parseInt(data[0]);

    // String docID = data[0];
    document = data[1];

    document = document.replaceAll("\\W", " ").toLowerCase();

    StringTokenizer tokenizer = new StringTokenizer(document);
    HashSet<String> stopWords = Util.STOP_WORDS;
    Map<String, Integer> map =
        new HashMap<String, Integer>(); // to identify the frequency of each word in the document

    int co = 1;
    while (tokenizer.hasMoreElements()) {

      String word = tokenizer.nextToken();
      if (stopWords.contains(word.toString())) {
        continue;
      }

      if (map.containsKey(word)) { // if the word added previously increment the count by one
        co++;
        map.put(word, co);
      } else { // add a new word to the map
        co = 1;
        map.put(word, co);
      }
    }

    Iterator iterator = map.entrySet().iterator();
    while (iterator.hasNext()) {
      Map.Entry pairs = (Map.Entry) iterator.next();
      String word = pairs.getKey().toString();
      int occur = Integer.parseInt(pairs.getValue().toString());

      result.setField(0, new IntValue(docID));
      result.setField(1, new StringValue(word));
      result.setField(2, new IntValue(occur));
      collector.collect(result);
    }
  }
    @Override
    public void map(Record record, Collector<Record> out) throws Exception {

      for (Record model : this.models) {
        // compute dot product between model and pair
        long product = 0;
        for (int i = 1; i <= NUM_FEATURES; i++) {
          product +=
              model.getField(i, this.lft).getValue() * record.getField(i, this.rgt).getValue();
        }
        this.prd.setValue(product);

        // construct result
        this.result.copyFrom(model, new int[] {0}, new int[] {0});
        this.result.copyFrom(record, new int[] {0}, new int[] {1});
        this.result.setField(2, this.prd);

        // emit result
        out.collect(this.result);
      }
    }
Пример #9
0
 /** Computes a pre-aggregated average value of a coordinate vector. */
 @Override
 public void combine(Iterator<Record> points, Collector<Record> out) {
   out.collect(sumPointsAndCount(points));
 }
Пример #10
0
 /** Compute the new position (coordinate vector) of a cluster center. */
 @Override
 public void reduce(Iterator<Record> points, Collector<Record> out) {
   Record sum = sumPointsAndCount(points);
   sum.setField(1, sum.getField(1, Point.class).div(sum.getField(2, IntValue.class).getValue()));
   out.collect(sum);
 }
Пример #11
0
    @Override
    public void coGroup(
        Iterator<Record> inputRecords, Iterator<Record> concatRecords, Collector<Record> out) {

      // init minimum length and minimum path
      Record pathRec = null;
      StringValue path = null;
      if (inputRecords.hasNext()) {
        // path is in input paths
        pathRec = inputRecords.next();
      } else {
        // path must be in concat paths
        pathRec = concatRecords.next();
      }
      // get from node (common for all paths)
      StringValue fromNode = pathRec.getField(0, StringValue.class);
      // get to node (common for all paths)
      StringValue toNode = pathRec.getField(1, StringValue.class);
      // get length of path
      minLength.setValue(pathRec.getField(2, IntValue.class).getValue());
      // store path and hop count
      path = new StringValue(pathRec.getField(4, StringValue.class));
      shortestPaths.add(path);
      hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));

      // find shortest path of all input paths
      while (inputRecords.hasNext()) {
        pathRec = inputRecords.next();
        IntValue length = pathRec.getField(2, IntValue.class);

        if (length.getValue() == minLength.getValue()) {
          // path has also minimum length add to list
          path = new StringValue(pathRec.getField(4, StringValue.class));
          if (shortestPaths.add(path)) {
            hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
          }
        } else if (length.getValue() < minLength.getValue()) {
          // path has minimum length
          minLength.setValue(length.getValue());
          // clear lists
          hopCnts.clear();
          shortestPaths.clear();
          // get path and add path and hop count
          path = new StringValue(pathRec.getField(4, StringValue.class));
          shortestPaths.add(path);
          hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
        }
      }

      // find shortest path of all input and concatenated paths
      while (concatRecords.hasNext()) {
        pathRec = concatRecords.next();
        IntValue length = pathRec.getField(2, IntValue.class);

        if (length.getValue() == minLength.getValue()) {
          // path has also minimum length add to list
          path = new StringValue(pathRec.getField(4, StringValue.class));
          if (shortestPaths.add(path)) {
            hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
          }
        } else if (length.getValue() < minLength.getValue()) {
          // path has minimum length
          minLength.setValue(length.getValue());
          // clear lists
          hopCnts.clear();
          shortestPaths.clear();
          // get path and add path and hop count
          path = new StringValue(pathRec.getField(4, StringValue.class));
          shortestPaths.add(path);
          hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
        }
      }

      outputRecord.setField(0, fromNode);
      outputRecord.setField(1, toNode);
      outputRecord.setField(2, minLength);

      // emit all shortest paths
      for (StringValue shortestPath : shortestPaths) {
        outputRecord.setField(3, hopCnts.get(shortestPath));
        outputRecord.setField(4, shortestPath);
        out.collect(outputRecord);
      }

      hopCnts.clear();
      shortestPaths.clear();
    }
Пример #12
0
 @Override
 public void map(Record record, Collector<Record> out) throws Exception {
   out.collect(record);
 }