private BlockSchema generateSchema(
      Map<String, CodeDictionary> dictionaryMap, BlockSchema originalSchema) {
    numColumns = originalSchema.getNumColumns();

    ColumnType[] columnTypes = new ColumnType[numColumns];

    // create dictionary array
    dictionaries = new CodeDictionary[numColumns];

    decodedTuple = TupleFactory.getInstance().newTuple(numColumns);

    for (int i = 0; i < columnTypes.length; i++) {
      ColumnType type = new ColumnType();
      columnTypes[i] = type;

      type.setName(originalSchema.getName(i));

      if (dictionaryMap.containsKey(type.getName())) {
        // this column is decoded. Transform the schema
        type.setType(DataType.STRING);
        dictionaries[i] = dictionaryMap.get(type.getName());
      } else {
        // this column is not decoded. Keep the schema intact
        type.setType(originalSchema.getType(i));
        dictionaries[i] = null;
      }
    }

    return new BlockSchema(columnTypes);
  }
示例#2
0
  private BlockSchema generateOutSchema(BlockSchema inputSchema) {
    List<ColumnType> outputColumnTypes = new ArrayList<ColumnType>();

    for (ColumnType ct : inputSchema.getColumnTypes()) {
      String colName = ct.getName();
      int colIndex = inputSchema.getIndex(colName);

      if (!flattenColumnNameSet.contains(colName)) {
        outputColumnTypes.add(ct);
      } else {
        BlockSchema inputNestedColumnSchema = ct.getColumnSchema();

        ColumnType[] ctypes = inputNestedColumnSchema.getColumnTypes();
        if (ctypes.length == 1 && ctypes[0].getType() == DataType.TUPLE)
          inputNestedColumnSchema = ctypes[0].getColumnSchema();

        List<ColumnType> flattedOutputColumnTypes = inputColumnIndexToOutputTypes.get(colIndex);

        if (flattedOutputColumnTypes != null && !flattedOutputColumnTypes.isEmpty()) {
          // output schema published in json.
          // TODO: assert output schema in json matches nested input schema for the column

          if (inputNestedColumnSchema == null || inputNestedColumnSchema.getColumnTypes() == null)
            throw new RuntimeException(
                "Invalid schema for columnn:  "
                    + colName
                    + " column schema: "
                    + inputNestedColumnSchema);

          if (flattedOutputColumnTypes.size() != inputNestedColumnSchema.getColumnTypes().length)
            throw new RuntimeException(
                "Output column specification does not match number of input fields for " + colName);
        } else {
          // output schema not published in json. Extract from nested input column schema

          if (inputNestedColumnSchema == null) {
            throw new RuntimeException("Schema is unknown for column: " + colName);
          } else {
            List<ColumnType> subColTypes = Arrays.asList(inputNestedColumnSchema.getColumnTypes());

            flattedOutputColumnTypes = new ArrayList<ColumnType>();
            flattedOutputColumnTypes.addAll(subColTypes);
          }

          inputColumnIndexToOutputTypes.put(colIndex, flattedOutputColumnTypes);
        }

        outputColumnTypes.addAll(flattedOutputColumnTypes);
      }
    }

    return new BlockSchema(outputColumnTypes.toArray(new ColumnType[0]));
  }