@Override public Tuple next() throws IOException, InterruptedException { Tuple tuple = dataBlock.next(); if (tuple == null) return null; for (int i = 0; i < numColumns; i++) { if (dictionaries[i] == null) { // this column is not decoded decodedTuple.set(i, tuple.get(i)); } else { // this column is decoded if (tuple.get(i) == null) { decodedTuple.set(i, null); } else { int code = ((Number) tuple.get(i)).intValue(); String val = dictionaries[i].getValueForCode(code); if (val == null) { if (replaceUnknownCodes == null) { throw new RuntimeException("code '" + code + "' is missing encoding in dictionary."); } else { val = replaceUnknownCodes; } } decodedTuple.set(i, val); } } } return decodedTuple; }
@Override public Block next() throws IOException, InterruptedException { Tuple metaDataTuple = matchingMetaBlock.next(); if (metaDataTuple == null) return null; // Done System.out.println("Collate Vector: metadata tuple = " + metaDataTuple.toString()); return generateVectorBlock(metaDataTuple); }
@Override public Tuple next() throws IOException, InterruptedException { Tuple tuple = block.next(); if (tuple == null) return null; for (int i = 0; i < columnCopyMap.length; i++) outputTuple.set(columnCopyMap[i], tuple.get(i)); return outputTuple; }
@Override public void setInput(Map<String, Block> input, JsonNode operatorJson, BlockProperties props) throws IOException, InterruptedException { inputBlock = input.values().iterator().next(); init(operatorJson, inputBlock.getProperties().getSchema()); nullBag = BagFactory.getInstance().newDefaultBag(); nullBag.add(TupleFactory.getInstance().newTuple(0)); }
@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { // Get the dictionary Map<String, CodeDictionary> dictionaryMap = null; if (json.has("path")) { // load the dictionary from file String dictionaryName = json.get("path").getTextValue(); String dictionaryPath = FileCache.get(dictionaryName); dictionaryPath = dictionaryPath + "/part-r-00000.avro"; dictionaryMap = GenerateDictionary.loadDictionary(dictionaryPath, false, null); } else { // this is inline dictionary JsonNode dictionary = json.get("dictionary"); Iterator<String> nameIterator = dictionary.getFieldNames(); dictionaryMap = new HashMap<String, CodeDictionary>(); while (nameIterator.hasNext()) { String name = nameIterator.next(); ArrayNode values = (ArrayNode) dictionary.get(name); CodeDictionary codeDictionary = new CodeDictionary(); for (JsonNode value : values) { codeDictionary.addKey(value.getTextValue()); } dictionaryMap.put(name, codeDictionary); } } dataBlock = input.values().iterator().next(); BlockSchema inputSchema = dataBlock.getProperties().getSchema(); numColumns = inputSchema.getNumColumns(); decodedTuple = TupleFactory.getInstance().newTuple(numColumns); // create dictionary array dictionaries = new CodeDictionary[numColumns]; for (int i = 0; i < numColumns; i++) { String colName = inputSchema.getName(i); if (dictionaryMap.containsKey(colName)) { dictionaries[i] = dictionaryMap.get(colName); } else { dictionaries[i] = null; } } if (json.has("replaceUnknownCodes")) { replaceUnknownCodes = JsonUtils.getText(json, "replaceUnknownCodes"); } }
public Tuple flattenBagNext() throws IOException, InterruptedException { Tuple t; // Rui. to avoid currentTupleNext being called twice. if (outTuple == null || (t = currentTupleNext()) == null) { Tuple inTuple = inputBlock.next(); if (inTuple == null) return null; // Rui. initCurrentTuple(inTuple); return this.outTuple; } return t; }
@Override public void setInput(Configuration conf, Map<String, Block> input, JsonNode json) throws IOException, InterruptedException { // #1. input block inputBlock = (RubixMemoryBlock) input.get(JsonUtils.getText(json, "inputBlock")); // #2. lookup column String lookupColumn = json.get("lookupColumn").getTextValue(); BlockSchema inputSchema = inputBlock.getProperties().getSchema(); coord2offsets = BlockUtils.generateColumnIndex(inputBlock, lookupColumn); // #3. meta data relation name metaRelationName = new String(JsonUtils.getText(json, "metaRelationName")); matchingMetaBlock = (Block) input.get(metaRelationName); BlockSchema metaBlockSchema = matchingMetaBlock.getProperties().getSchema(); // #4. find indexes for coordinate column names in meta relation's schema String[] coordinateColumns = JsonUtils.asArray(json.get("coordinateColumns")); coordinateColumnIndexes = new int[coordinateColumns.length]; int idx = 0; for (String s : JsonUtils.asArray(json.get("coordinateColumns"))) coordinateColumnIndexes[idx++] = metaBlockSchema.getIndex(s); // #5. find index of identifier column in meta relation's schema identifierColumnName = new String(JsonUtils.getText(json, "identifierColumn")); identifierColumnIndex = metaBlockSchema.getIndex(identifierColumnName); // #6. combine columns ArrayNode combineColumns = (ArrayNode) json.get("combineColumns"); // setup info for sort operator /* * jsonForSort = JsonUtils.cloneNode(json); ((ObjectNode) * jsonForSort).put("sortBy", combineColumns); sortedBlock = new * TupleOperatorBlock(sortOp); */ // setup info for combiner operator jsonForCombine = JsonUtils.createObjectNode(); ((ObjectNode) jsonForCombine).put("pivotBy", combineColumns); ((ObjectNode) jsonForCombine).put("schema", inputSchema.toJson()); combinedBlock = new TupleOperatorBlock(combineOp, null); // setup info for generate operator jsonForGenerate = JsonUtils.createObjectNode(); }
@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { block = input.values().iterator().next(); BlockSchema inSchema = block.getProperties().getSchema(); BlockSchema outSchema = props.getSchema(); outputTuple = TupleFactory.getInstance().newTuple(outSchema.getNumColumns()); // columnCopyMap is an array that maps input column (at index i) // to output column (value at index i) columnCopyMap = new int[inSchema.getNumColumns()]; for (int i = 0; i < inSchema.getNumColumns(); i++) { columnCopyMap[i] = outSchema.getIndex(inSchema.getName(i)); } // set the tag int tag = json.get("tag").getIntValue(); // the left mapper puts the number of columns as the tag if (tag != 0) tag = inSchema.getNumColumns(); outputTuple.set(outSchema.getIndex("___tag"), tag); }