@Override
  public Tuple next() throws IOException, InterruptedException {
    Tuple tuple = dataBlock.next();
    if (tuple == null) return null;

    for (int i = 0; i < numColumns; i++) {
      if (dictionaries[i] == null) {
        // this column is not decoded
        decodedTuple.set(i, tuple.get(i));
      } else {
        // this column is decoded
        if (tuple.get(i) == null) {
          decodedTuple.set(i, null);
        } else {
          int code = ((Number) tuple.get(i)).intValue();
          String val = dictionaries[i].getValueForCode(code);

          if (val == null) {
            if (replaceUnknownCodes == null) {
              throw new RuntimeException("code '" + code + "' is missing encoding in dictionary.");
            } else {
              val = replaceUnknownCodes;
            }
          }

          decodedTuple.set(i, val);
        }
      }
    }

    return decodedTuple;
  }
  @Override
  public Block next() throws IOException, InterruptedException {
    Tuple metaDataTuple = matchingMetaBlock.next();
    if (metaDataTuple == null) return null; // Done

    System.out.println("Collate Vector: metadata tuple = " + metaDataTuple.toString());
    return generateVectorBlock(metaDataTuple);
  }
Example #3
0
  @Override
  public Tuple next() throws IOException, InterruptedException {
    Tuple tuple = block.next();
    if (tuple == null) return null;

    for (int i = 0; i < columnCopyMap.length; i++) outputTuple.set(columnCopyMap[i], tuple.get(i));

    return outputTuple;
  }
Example #4
0
  @Override
  public void setInput(Map<String, Block> input, JsonNode operatorJson, BlockProperties props)
      throws IOException, InterruptedException {
    inputBlock = input.values().iterator().next();

    init(operatorJson, inputBlock.getProperties().getSchema());

    nullBag = BagFactory.getInstance().newDefaultBag();

    nullBag.add(TupleFactory.getInstance().newTuple(0));
  }
  @Override
  public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
      throws IOException, InterruptedException {
    // Get the dictionary
    Map<String, CodeDictionary> dictionaryMap = null;
    if (json.has("path")) {
      // load the dictionary from file
      String dictionaryName = json.get("path").getTextValue();
      String dictionaryPath = FileCache.get(dictionaryName);
      dictionaryPath = dictionaryPath + "/part-r-00000.avro";
      dictionaryMap = GenerateDictionary.loadDictionary(dictionaryPath, false, null);
    } else {
      // this is inline dictionary
      JsonNode dictionary = json.get("dictionary");

      Iterator<String> nameIterator = dictionary.getFieldNames();
      dictionaryMap = new HashMap<String, CodeDictionary>();
      while (nameIterator.hasNext()) {
        String name = nameIterator.next();
        ArrayNode values = (ArrayNode) dictionary.get(name);
        CodeDictionary codeDictionary = new CodeDictionary();
        for (JsonNode value : values) {
          codeDictionary.addKey(value.getTextValue());
        }
        dictionaryMap.put(name, codeDictionary);
      }
    }

    dataBlock = input.values().iterator().next();
    BlockSchema inputSchema = dataBlock.getProperties().getSchema();
    numColumns = inputSchema.getNumColumns();

    decodedTuple = TupleFactory.getInstance().newTuple(numColumns);

    // create dictionary array
    dictionaries = new CodeDictionary[numColumns];

    for (int i = 0; i < numColumns; i++) {
      String colName = inputSchema.getName(i);

      if (dictionaryMap.containsKey(colName)) {
        dictionaries[i] = dictionaryMap.get(colName);
      } else {
        dictionaries[i] = null;
      }
    }

    if (json.has("replaceUnknownCodes")) {
      replaceUnknownCodes = JsonUtils.getText(json, "replaceUnknownCodes");
    }
  }
Example #6
0
  public Tuple flattenBagNext() throws IOException, InterruptedException {
    Tuple t; // Rui. to avoid currentTupleNext being called twice.

    if (outTuple == null || (t = currentTupleNext()) == null) {
      Tuple inTuple = inputBlock.next();

      if (inTuple == null) return null; // Rui.

      initCurrentTuple(inTuple);
      return this.outTuple;
    }

    return t;
  }
  @Override
  public void setInput(Configuration conf, Map<String, Block> input, JsonNode json)
      throws IOException, InterruptedException {
    // #1. input block
    inputBlock = (RubixMemoryBlock) input.get(JsonUtils.getText(json, "inputBlock"));

    // #2. lookup column
    String lookupColumn = json.get("lookupColumn").getTextValue();
    BlockSchema inputSchema = inputBlock.getProperties().getSchema();

    coord2offsets = BlockUtils.generateColumnIndex(inputBlock, lookupColumn);

    // #3. meta data relation name
    metaRelationName = new String(JsonUtils.getText(json, "metaRelationName"));
    matchingMetaBlock = (Block) input.get(metaRelationName);
    BlockSchema metaBlockSchema = matchingMetaBlock.getProperties().getSchema();

    // #4. find indexes for coordinate column names in meta relation's schema
    String[] coordinateColumns = JsonUtils.asArray(json.get("coordinateColumns"));
    coordinateColumnIndexes = new int[coordinateColumns.length];
    int idx = 0;
    for (String s : JsonUtils.asArray(json.get("coordinateColumns")))
      coordinateColumnIndexes[idx++] = metaBlockSchema.getIndex(s);

    // #5. find index of identifier column in meta relation's schema
    identifierColumnName = new String(JsonUtils.getText(json, "identifierColumn"));
    identifierColumnIndex = metaBlockSchema.getIndex(identifierColumnName);

    // #6. combine columns
    ArrayNode combineColumns = (ArrayNode) json.get("combineColumns");

    // setup info for sort operator
    /*
     * jsonForSort = JsonUtils.cloneNode(json); ((ObjectNode)
     * jsonForSort).put("sortBy", combineColumns); sortedBlock = new
     * TupleOperatorBlock(sortOp);
     */

    // setup info for combiner operator
    jsonForCombine = JsonUtils.createObjectNode();
    ((ObjectNode) jsonForCombine).put("pivotBy", combineColumns);
    ((ObjectNode) jsonForCombine).put("schema", inputSchema.toJson());
    combinedBlock = new TupleOperatorBlock(combineOp, null);

    // setup info for generate operator
    jsonForGenerate = JsonUtils.createObjectNode();
  }
Example #8
0
  @Override
  public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
      throws IOException, InterruptedException {
    block = input.values().iterator().next();
    BlockSchema inSchema = block.getProperties().getSchema();
    BlockSchema outSchema = props.getSchema();

    outputTuple = TupleFactory.getInstance().newTuple(outSchema.getNumColumns());

    // columnCopyMap is an array that maps input column (at index i)
    // to output column (value at index i)
    columnCopyMap = new int[inSchema.getNumColumns()];
    for (int i = 0; i < inSchema.getNumColumns(); i++) {
      columnCopyMap[i] = outSchema.getIndex(inSchema.getName(i));
    }

    // set the tag
    int tag = json.get("tag").getIntValue();
    // the left mapper puts the number of columns as the tag
    if (tag != 0) tag = inSchema.getNumColumns();

    outputTuple.set(outSchema.getIndex("___tag"), tag);
  }