@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { // Get the dictionary Map<String, CodeDictionary> dictionaryMap = null; if (json.has("path")) { // load the dictionary from file String dictionaryName = json.get("path").getTextValue(); String dictionaryPath = FileCache.get(dictionaryName); dictionaryPath = dictionaryPath + "/part-r-00000.avro"; dictionaryMap = GenerateDictionary.loadDictionary(dictionaryPath, false, null); } else { // this is inline dictionary JsonNode dictionary = json.get("dictionary"); Iterator<String> nameIterator = dictionary.getFieldNames(); dictionaryMap = new HashMap<String, CodeDictionary>(); while (nameIterator.hasNext()) { String name = nameIterator.next(); ArrayNode values = (ArrayNode) dictionary.get(name); CodeDictionary codeDictionary = new CodeDictionary(); for (JsonNode value : values) { codeDictionary.addKey(value.getTextValue()); } dictionaryMap.put(name, codeDictionary); } } dataBlock = input.values().iterator().next(); BlockSchema inputSchema = dataBlock.getProperties().getSchema(); numColumns = inputSchema.getNumColumns(); decodedTuple = TupleFactory.getInstance().newTuple(numColumns); // create dictionary array dictionaries = new CodeDictionary[numColumns]; for (int i = 0; i < numColumns; i++) { String colName = inputSchema.getName(i); if (dictionaryMap.containsKey(colName)) { dictionaries[i] = dictionaryMap.get(colName); } else { dictionaries[i] = null; } } if (json.has("replaceUnknownCodes")) { replaceUnknownCodes = JsonUtils.getText(json, "replaceUnknownCodes"); } }
@Override public PostCondition getPostCondition(Map<String, PostCondition> preConditions, JsonNode json) throws PreconditionException { String inputBlockName = JsonUtils.getText(json, "input"); PostCondition inputCondition = preConditions.get(inputBlockName); BlockSchema inputSchema = inputCondition.getSchema(); Map<String, CodeDictionary> dictionaryMap = new HashMap<String, CodeDictionary>(); if (json.has("columns")) { String[] columns = JsonUtils.asArray(json, "columns"); for (String column : columns) dictionaryMap.put(column, new CodeDictionary()); } else { JsonNode dictionary = json.get("dictionary"); // this is inline dictionary Iterator<String> nameIterator = dictionary.getFieldNames(); while (nameIterator.hasNext()) { String name = nameIterator.next(); ArrayNode values = (ArrayNode) dictionary.get(name); CodeDictionary codeDictionary = new CodeDictionary(); for (JsonNode value : values) { codeDictionary.addKey(value.getTextValue()); } dictionaryMap.put(name, codeDictionary); } } int numColumns = inputSchema.getNumColumns(); ColumnType[] columnTypes = new ColumnType[numColumns]; for (int i = 0; i < columnTypes.length; i++) { ColumnType type; final String name = inputSchema.getName(i); if (dictionaryMap.containsKey(name)) { // this column is decoded. Transform schema type = new ColumnType(name, DataType.STRING); } else { // this column is not decoded. Reuse schema type = inputSchema.getColumnType(i); } columnTypes[i] = type; } BlockSchema schema = new BlockSchema(columnTypes); return new PostCondition( schema, inputCondition.getPartitionKeys(), inputCondition.getSortKeys()); }
@Override public Object get(int index) { int code = codeList.getShort(index); if (code == 0) return null; return dictionary.getValueForCode(code); }
@Override public void add(Object value) { if (value == null) codeList.addShort((short) 0); // 0 is the code for null value // addKey() will first check if the key exists already int code = dictionary.addKey((String) value); codeList.addShort((short) code); size++; }