コード例 #1
0
 private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema)
     throws IOException {
   if (fieldSchema == null) {
     throw new IOException("Schema is null");
   }
   ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields();
   Tuple t;
   int buf;
   while ((buf = in.read()) != '{') {
     if (buf == -1) {
       throw new IOException("Unexpect end of bag");
     }
   }
   if (fss.length != 1) throw new IOException("Only tuple is allowed inside bag schema");
   ResourceFieldSchema fs = fss[0];
   DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
   while (true) {
     t = consumeTuple(in, fs);
     if (t != null) db.add(t);
     while ((buf = in.read()) != '}' && buf != ',') {
       if (buf == -1) {
         throw new IOException("Unexpect end of bag");
       }
     }
     if (buf == '}') break;
   }
   return db;
 }
コード例 #2
0
ファイル: TOBAG.java プロジェクト: nfouka/hadoop_single_node
  @Override
  public DataBag exec(Tuple input) throws IOException {
    try {

      DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();

      if (input == null || input.size() == 0) {
        return bag; // an empty bag
      }
      if (this.fieldType == DataType.MAP) {

        Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
        t.set(0, createMap(input));

        bag.add(t);

      } else {
        bag.add(input);
      }

      return bag;

    } catch (Exception e) {
      throw new RuntimeException(
          "Error while computing size in " + this.getClass().getSimpleName());
    }
  }
コード例 #3
0
ファイル: TupleMax.java プロジェクト: hiromasah/charsiu
  @Override
  public DataBag exec(Tuple aInput) throws IOException {
    // invalid value | 無効値
    if (aInput == null) return DefaultBagFactory.getInstance().newDefaultBag();

    // processing target | 処理対象
    DataBag tTargetBag = DataType.toBag(aInput.get(0));
    DataBag tComparableValueBag = DataType.toBag(aInput.get(1));
    if (tTargetBag.size() == 0 || tComparableValueBag.size() == 0)
      return DefaultBagFactory.getInstance().newDefaultBag();

    Iterator<Tuple> tTargetBagIterator = tTargetBag.iterator();
    Iterator<Tuple> tComparableValueBagIterator = tComparableValueBag.iterator();

    ArrayList<Tuple> tProtoBag = new ArrayList<Tuple>();

    Double tMaxValue = Double.NEGATIVE_INFINITY;

    while (tComparableValueBagIterator.hasNext()) {
      Tuple tCurrentTargetTuple = tTargetBagIterator.next();
      Double tCurrentValue = DataType.toDouble(tComparableValueBagIterator.next().get(0));
      if (tCurrentValue == null) continue;
      // add a tuple to tProtoBag if the same as existing MaxValue
      // 現 MaxValue と同じなら、タプルを tProtoBag に追加
      if (tMaxValue.equals(tCurrentValue)) {
        tProtoBag.add(tCurrentTargetTuple);
      }
      // clear tProtoBag if bigger than existing MaxValue and add a tuple to tProtoBag
      // 現 MaxValue より大きいなら、tProtoBag をクリアし、タプルを tProtoBag に追加
      else if (tMaxValue < tCurrentValue) {
        tMaxValue = tCurrentValue;
        tProtoBag.clear();
        tProtoBag.add(tCurrentTargetTuple);
      }
    }

    return DefaultBagFactory.getInstance().newDefaultBag(tProtoBag);
  }
コード例 #4
0
  public DataBag exec(Tuple input) throws IOException {

    DataBag output = DefaultBagFactory.getInstance().newDefaultBag();

    if (input == null || input.size() == 0) return null;
    try {
      String seq = ((String) input.get(0));
      // byte[] ba  = ((DataByteArray) input.get(0)).get();
      int distance = (Integer) input.get(1);
      // int seqLength = SequenceString.numBases(ba);
      // String seq = SequenceString.byteArrayToSequence(ba);

      Set<String> neighbors = MetaUtils.generateAllNeighborsWithinDistance(seq, distance);
      for (String n : neighbors) {
        Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
        t.set(0, n);
        output.add(t);
      }
    } catch (Exception e) {
      System.err.println("HammingDistance: failed to process input; error - " + e.getMessage());
      return null;
    }
    return output;
  }