@Override
    public Record readRecord(Record target, byte[] bytes, int offset, int numBytes) {
      String lineStr = new String(bytes, offset, numBytes);
      // replace reduce whitespaces and trim
      lineStr = lineStr.replaceAll("\\s+", " ").trim();
      // build whitespace tokenizer
      StringTokenizer st = new StringTokenizer(lineStr, " ");

      // line must have at least three elements
      if (st.countTokens() < 3) {
        return null;
      }

      String rdfSubj = st.nextToken();
      String rdfPred = st.nextToken();
      String rdfObj = st.nextToken();

      // we only want foaf:knows predicates
      if (!rdfPred.equals("<http://xmlns.com/foaf/0.1/knows>")) {
        return null;
      }

      // build node pair from subject and object
      fromNode.setValue(rdfSubj);
      toNode.setValue(rdfObj);

      target.setField(0, fromNode);
      target.setField(1, toNode);
      target.setField(2, pathLength);
      target.setField(3, hopCnt);
      target.setField(4, hopList);

      return target;
    }
    /**
     * Computes a minimum aggregation on the distance of a data point to cluster centers.
     *
     * <p>Output Format: 0: centerID 1: pointVector 2: constant(1) (to enable combinable average
     * computation in the following reducer)
     */
    @Override
    public void map(Record dataPointRecord, Collector<Record> out) {
      Point p = dataPointRecord.getField(1, Point.class);

      double nearestDistance = Double.MAX_VALUE;
      int centerId = -1;

      // check all cluster centers
      for (PointWithId center : centers) {
        // compute distance
        double distance = p.euclideanDistance(center.point);

        // update nearest cluster if necessary
        if (distance < nearestDistance) {
          nearestDistance = distance;
          centerId = center.id;
        }
      }

      // emit a new record with the center id and the data point. add a one to ease the
      // implementation of the average function with a combiner
      result.setField(0, new IntValue(centerId));
      result.setField(1, p);
      result.setField(2, one);

      out.collect(result);
    }
    @Override
    public void join(Record rec1, Record rec2, Collector<Record> out) throws Exception {

      // rec1 has matching start, rec2 matching end
      // Therefore, rec2's end node and rec1's start node are identical
      // First half of new path will be rec2, second half will be rec1

      // Get from-node and to-node of new path
      final StringValue fromNode = rec2.getField(0, StringValue.class);
      final StringValue toNode = rec1.getField(1, StringValue.class);

      // Check whether from-node = to-node to prevent circles!
      if (fromNode.equals(toNode)) {
        return;
      }

      // Create new path
      outputRecord.setField(0, fromNode);
      outputRecord.setField(1, toNode);

      // Compute length of new path
      length.setValue(
          rec1.getField(2, IntValue.class).getValue()
              + rec2.getField(2, IntValue.class).getValue());
      outputRecord.setField(2, length);

      // compute hop count
      int hops =
          rec1.getField(3, IntValue.class).getValue()
              + 1
              + rec2.getField(3, IntValue.class).getValue();
      hopCnt.setValue(hops);
      outputRecord.setField(3, hopCnt);

      // Concatenate hops lists and insert matching node
      StringBuilder sb = new StringBuilder();
      // first path
      sb.append(rec2.getField(4, StringValue.class).getValue());
      sb.append(" ");
      // common node
      sb.append(rec1.getField(0, StringValue.class).getValue());
      // second path
      sb.append(" ");
      sb.append(rec1.getField(4, StringValue.class).getValue());

      hopList.setValue(sb.toString().trim());
      outputRecord.setField(4, hopList);

      out.collect(outputRecord);
    }
Example #4
0
  public boolean readRecord(Record target, byte[] bytes, int offset, int numBytes) {
    StringValue str = this.theString;

    if (this.ascii) {
      str.setValueAscii(bytes, offset, numBytes);
    } else {
      ByteBuffer byteWrapper = this.byteWrapper;
      if (bytes != byteWrapper.array()) {
        byteWrapper = ByteBuffer.wrap(bytes, 0, bytes.length);
        this.byteWrapper = byteWrapper;
      }
      byteWrapper.limit(offset + numBytes);
      byteWrapper.position(offset);

      try {
        CharBuffer result = this.decoder.decode(byteWrapper);
        str.setValue(result);
      } catch (CharacterCodingException e) {
        byte[] copy = new byte[numBytes];
        System.arraycopy(bytes, offset, copy, 0, numBytes);
        LOG.warn("Line could not be encoded: " + Arrays.toString(copy), e);
        return false;
      }
    }

    target.clear();
    target.setField(this.pos, str);
    return true;
  }
    private final Record sumPointsAndCount(Iterator<Record> dataPoints) {
      Record next = null;
      p.clear();
      int count = 0;

      // compute coordinate vector sum and count
      while (dataPoints.hasNext()) {
        next = dataPoints.next();
        p.add(next.getField(1, Point.class));
        count += next.getField(2, IntValue.class).getValue();
      }

      next.setField(1, p);
      next.setField(2, new IntValue(count));
      return next;
    }
    @Override
    public void coGroup(
        Iterator<Record> candidates, Iterator<Record> current, Collector<Record> out)
        throws Exception {
      if (!current.hasNext()) {
        throw new Exception("Error: Id not encountered before.");
      }
      Record old = current.next();
      long oldId = old.getField(1, LongValue.class).getValue();

      long minimumComponentID = Long.MAX_VALUE;

      while (candidates.hasNext()) {
        long candidateComponentID = candidates.next().getField(1, LongValue.class).getValue();
        if (candidateComponentID < minimumComponentID) {
          minimumComponentID = candidateComponentID;
        }
      }

      if (minimumComponentID < oldId) {
        newComponentId.setValue(minimumComponentID);
        old.setField(1, newComponentId);
        out.collect(old);
      }
    }
    @Override
    public void map(Record record, Collector<Record> out) throws Exception {
      double x = record.getField(1, DoubleValue.class).getValue();
      double y = record.getField(2, DoubleValue.class).getValue();
      double z = record.getField(3, DoubleValue.class).getValue();

      record.setField(1, new Point(x, y, z));
      out.collect(record);
    }
    @Override
    public Record combineFirst(Iterator<Record> records) {
      Record next = null;
      long min = Long.MAX_VALUE;
      while (records.hasNext()) {
        next = records.next();
        min = Math.min(min, next.getField(1, LongValue.class).getValue());
      }

      newComponentId.setValue(min);
      next.setField(1, newComponentId);
      return next;
    }
    @Override
    public Record readRecord(Record target, byte[] bytes, int offset, int numBytes) {
      String lineStr = new String(bytes, offset, numBytes);
      StringTokenizer st = new StringTokenizer(lineStr, "|");

      // path must have exactly 5 tokens (fromNode, toNode, length, hopCnt, hopList)
      if (st.countTokens() != 5) {
        return null;
      }

      this.fromNode.setValue(st.nextToken());
      this.toNode.setValue(st.nextToken());
      this.length.setValue(Integer.parseInt(st.nextToken()));
      this.hopCnt.setValue(Integer.parseInt(st.nextToken()));
      this.hopList.setValue(st.nextToken());

      target.setField(0, fromNode);
      target.setField(1, toNode);
      target.setField(2, length);
      target.setField(3, hopCnt);
      target.setField(4, hopList);

      return target;
    }
Example #10
0
  /**
   * Filter "lineitem".
   *
   * <p>Output Schema: Key: orderkey Value: (partkey, suppkey, quantity, price)
   */
  @Override
  public void map(Record record, Collector<Record> out) throws Exception {
    Tuple inputTuple = record.getField(1, Tuple.class);

    /* Extract the year from the date element of the order relation: */

    /* pice = extendedprice * (1 - discount): */
    float price =
        Float.parseFloat(inputTuple.getStringValueAt(5))
            * (1 - Float.parseFloat(inputTuple.getStringValueAt(6)));
    /* Project (orderkey | partkey, suppkey, linenumber, quantity, extendedprice, discount, tax, ...) to (partkey, suppkey, quantity): */
    inputTuple.project((0 << 0) | (1 << 1) | (1 << 2) | (0 << 3) | (1 << 4));
    inputTuple.addAttribute("" + price);
    record.setField(1, inputTuple);
    out.collect(record);
  }
 /** Compute the new position (coordinate vector) of a cluster center. */
 @Override
 public void reduce(Iterator<Record> points, Collector<Record> out) {
   Record sum = sumPointsAndCount(points);
   sum.setField(1, sum.getField(1, Point.class).div(sum.getField(2, IntValue.class).getValue()));
   out.collect(sum);
 }
 @Override
 public void convert(Record stratosphereRecord, K hadoopKey, V hadoopValue) {
   stratosphereRecord.setField(0, convert(hadoopKey));
   stratosphereRecord.setField(1, convert(hadoopValue));
 }
    @Override
    public void coGroup(
        Iterator<Record> inputRecords, Iterator<Record> concatRecords, Collector<Record> out) {

      // init minimum length and minimum path
      Record pathRec = null;
      StringValue path = null;
      if (inputRecords.hasNext()) {
        // path is in input paths
        pathRec = inputRecords.next();
      } else {
        // path must be in concat paths
        pathRec = concatRecords.next();
      }
      // get from node (common for all paths)
      StringValue fromNode = pathRec.getField(0, StringValue.class);
      // get to node (common for all paths)
      StringValue toNode = pathRec.getField(1, StringValue.class);
      // get length of path
      minLength.setValue(pathRec.getField(2, IntValue.class).getValue());
      // store path and hop count
      path = new StringValue(pathRec.getField(4, StringValue.class));
      shortestPaths.add(path);
      hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));

      // find shortest path of all input paths
      while (inputRecords.hasNext()) {
        pathRec = inputRecords.next();
        IntValue length = pathRec.getField(2, IntValue.class);

        if (length.getValue() == minLength.getValue()) {
          // path has also minimum length add to list
          path = new StringValue(pathRec.getField(4, StringValue.class));
          if (shortestPaths.add(path)) {
            hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
          }
        } else if (length.getValue() < minLength.getValue()) {
          // path has minimum length
          minLength.setValue(length.getValue());
          // clear lists
          hopCnts.clear();
          shortestPaths.clear();
          // get path and add path and hop count
          path = new StringValue(pathRec.getField(4, StringValue.class));
          shortestPaths.add(path);
          hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
        }
      }

      // find shortest path of all input and concatenated paths
      while (concatRecords.hasNext()) {
        pathRec = concatRecords.next();
        IntValue length = pathRec.getField(2, IntValue.class);

        if (length.getValue() == minLength.getValue()) {
          // path has also minimum length add to list
          path = new StringValue(pathRec.getField(4, StringValue.class));
          if (shortestPaths.add(path)) {
            hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
          }
        } else if (length.getValue() < minLength.getValue()) {
          // path has minimum length
          minLength.setValue(length.getValue());
          // clear lists
          hopCnts.clear();
          shortestPaths.clear();
          // get path and add path and hop count
          path = new StringValue(pathRec.getField(4, StringValue.class));
          shortestPaths.add(path);
          hopCnts.put(path, new IntValue(pathRec.getField(3, IntValue.class).getValue()));
        }
      }

      outputRecord.setField(0, fromNode);
      outputRecord.setField(1, toNode);
      outputRecord.setField(2, minLength);

      // emit all shortest paths
      for (StringValue shortestPath : shortestPaths) {
        outputRecord.setField(3, hopCnts.get(shortestPath));
        outputRecord.setField(4, shortestPath);
        out.collect(outputRecord);
      }

      hopCnts.clear();
      shortestPaths.clear();
    }