@Override
  public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
      throws IOException, InterruptedException {
    // Get the dictionary
    Map<String, CodeDictionary> dictionaryMap = null;
    if (json.has("path")) {
      // load the dictionary from file
      String dictionaryName = json.get("path").getTextValue();
      String dictionaryPath = FileCache.get(dictionaryName);
      dictionaryPath = dictionaryPath + "/part-r-00000.avro";
      dictionaryMap = GenerateDictionary.loadDictionary(dictionaryPath, false, null);
    } else {
      // this is inline dictionary
      JsonNode dictionary = json.get("dictionary");

      Iterator<String> nameIterator = dictionary.getFieldNames();
      dictionaryMap = new HashMap<String, CodeDictionary>();
      while (nameIterator.hasNext()) {
        String name = nameIterator.next();
        ArrayNode values = (ArrayNode) dictionary.get(name);
        CodeDictionary codeDictionary = new CodeDictionary();
        for (JsonNode value : values) {
          codeDictionary.addKey(value.getTextValue());
        }
        dictionaryMap.put(name, codeDictionary);
      }
    }

    dataBlock = input.values().iterator().next();
    BlockSchema inputSchema = dataBlock.getProperties().getSchema();
    numColumns = inputSchema.getNumColumns();

    decodedTuple = TupleFactory.getInstance().newTuple(numColumns);

    // create dictionary array
    dictionaries = new CodeDictionary[numColumns];

    for (int i = 0; i < numColumns; i++) {
      String colName = inputSchema.getName(i);

      if (dictionaryMap.containsKey(colName)) {
        dictionaries[i] = dictionaryMap.get(colName);
      } else {
        dictionaries[i] = null;
      }
    }

    if (json.has("replaceUnknownCodes")) {
      replaceUnknownCodes = JsonUtils.getText(json, "replaceUnknownCodes");
    }
  }
  @Override
  public PostCondition getPostCondition(Map<String, PostCondition> preConditions, JsonNode json)
      throws PreconditionException {
    String inputBlockName = JsonUtils.getText(json, "input");
    PostCondition inputCondition = preConditions.get(inputBlockName);
    BlockSchema inputSchema = inputCondition.getSchema();

    Map<String, CodeDictionary> dictionaryMap = new HashMap<String, CodeDictionary>();
    if (json.has("columns")) {
      String[] columns = JsonUtils.asArray(json, "columns");
      for (String column : columns) dictionaryMap.put(column, new CodeDictionary());
    } else {
      JsonNode dictionary = json.get("dictionary");
      // this is inline dictionary
      Iterator<String> nameIterator = dictionary.getFieldNames();
      while (nameIterator.hasNext()) {
        String name = nameIterator.next();
        ArrayNode values = (ArrayNode) dictionary.get(name);
        CodeDictionary codeDictionary = new CodeDictionary();
        for (JsonNode value : values) {
          codeDictionary.addKey(value.getTextValue());
        }
        dictionaryMap.put(name, codeDictionary);
      }
    }

    int numColumns = inputSchema.getNumColumns();

    ColumnType[] columnTypes = new ColumnType[numColumns];

    for (int i = 0; i < columnTypes.length; i++) {
      ColumnType type;
      final String name = inputSchema.getName(i);

      if (dictionaryMap.containsKey(name)) {
        // this column is decoded. Transform schema
        type = new ColumnType(name, DataType.STRING);
      } else {
        // this column is not decoded. Reuse schema
        type = inputSchema.getColumnType(i);
      }

      columnTypes[i] = type;
    }

    BlockSchema schema = new BlockSchema(columnTypes);

    return new PostCondition(
        schema, inputCondition.getPartitionKeys(), inputCondition.getSortKeys());
  }
Exemplo n.º 3
0
  @Override
  public Object get(int index) {
    int code = codeList.getShort(index);
    if (code == 0) return null;

    return dictionary.getValueForCode(code);
  }
Exemplo n.º 4
0
  @Override
  public void add(Object value) {
    if (value == null) codeList.addShort((short) 0); // 0 is the code for null value

    // addKey() will first check if the key exists already
    int code = dictionary.addKey((String) value);
    codeList.addShort((short) code);

    size++;
  }