private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema == null) { throw new IOException("Schema is null"); } ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields(); Tuple t; int buf; while ((buf = in.read()) != '{') { if (buf == -1) { throw new IOException("Unexpect end of bag"); } } if (fss.length != 1) throw new IOException("Only tuple is allowed inside bag schema"); ResourceFieldSchema fs = fss[0]; DataBag db = DefaultBagFactory.getInstance().newDefaultBag(); while (true) { t = consumeTuple(in, fs); if (t != null) db.add(t); while ((buf = in.read()) != '}' && buf != ',') { if (buf == -1) { throw new IOException("Unexpect end of bag"); } } if (buf == '}') break; } return db; }
@Override public DataBag exec(Tuple input) throws IOException { try { DataBag bag = DefaultBagFactory.getInstance().newDefaultBag(); if (input == null || input.size() == 0) { return bag; // an empty bag } if (this.fieldType == DataType.MAP) { Tuple t = DefaultTupleFactory.getInstance().newTuple(1); t.set(0, createMap(input)); bag.add(t); } else { bag.add(input); } return bag; } catch (Exception e) { throw new RuntimeException( "Error while computing size in " + this.getClass().getSimpleName()); } }
@Override public DataBag exec(Tuple aInput) throws IOException { // invalid value | 無効値 if (aInput == null) return DefaultBagFactory.getInstance().newDefaultBag(); // processing target | 処理対象 DataBag tTargetBag = DataType.toBag(aInput.get(0)); DataBag tComparableValueBag = DataType.toBag(aInput.get(1)); if (tTargetBag.size() == 0 || tComparableValueBag.size() == 0) return DefaultBagFactory.getInstance().newDefaultBag(); Iterator<Tuple> tTargetBagIterator = tTargetBag.iterator(); Iterator<Tuple> tComparableValueBagIterator = tComparableValueBag.iterator(); ArrayList<Tuple> tProtoBag = new ArrayList<Tuple>(); Double tMaxValue = Double.NEGATIVE_INFINITY; while (tComparableValueBagIterator.hasNext()) { Tuple tCurrentTargetTuple = tTargetBagIterator.next(); Double tCurrentValue = DataType.toDouble(tComparableValueBagIterator.next().get(0)); if (tCurrentValue == null) continue; // add a tuple to tProtoBag if the same as existing MaxValue // 現 MaxValue と同じなら、タプルを tProtoBag に追加 if (tMaxValue.equals(tCurrentValue)) { tProtoBag.add(tCurrentTargetTuple); } // clear tProtoBag if bigger than existing MaxValue and add a tuple to tProtoBag // 現 MaxValue より大きいなら、tProtoBag をクリアし、タプルを tProtoBag に追加 else if (tMaxValue < tCurrentValue) { tMaxValue = tCurrentValue; tProtoBag.clear(); tProtoBag.add(tCurrentTargetTuple); } } return DefaultBagFactory.getInstance().newDefaultBag(tProtoBag); }
public DataBag exec(Tuple input) throws IOException { DataBag output = DefaultBagFactory.getInstance().newDefaultBag(); if (input == null || input.size() == 0) return null; try { String seq = ((String) input.get(0)); // byte[] ba = ((DataByteArray) input.get(0)).get(); int distance = (Integer) input.get(1); // int seqLength = SequenceString.numBases(ba); // String seq = SequenceString.byteArrayToSequence(ba); Set<String> neighbors = MetaUtils.generateAllNeighborsWithinDistance(seq, distance); for (String n : neighbors) { Tuple t = DefaultTupleFactory.getInstance().newTuple(1); t.set(0, n); output.add(t); } } catch (Exception e) { System.err.println("HammingDistance: failed to process input; error - " + e.getMessage()); return null; } return output; }