private BlockSchema generateSchema( Map<String, CodeDictionary> dictionaryMap, BlockSchema originalSchema) { numColumns = originalSchema.getNumColumns(); ColumnType[] columnTypes = new ColumnType[numColumns]; // create dictionary array dictionaries = new CodeDictionary[numColumns]; decodedTuple = TupleFactory.getInstance().newTuple(numColumns); for (int i = 0; i < columnTypes.length; i++) { ColumnType type = new ColumnType(); columnTypes[i] = type; type.setName(originalSchema.getName(i)); if (dictionaryMap.containsKey(type.getName())) { // this column is decoded. Transform the schema type.setType(DataType.STRING); dictionaries[i] = dictionaryMap.get(type.getName()); } else { // this column is not decoded. Keep the schema intact type.setType(originalSchema.getType(i)); dictionaries[i] = null; } } return new BlockSchema(columnTypes); }
private BlockSchema generateOutSchema(BlockSchema inputSchema) { List<ColumnType> outputColumnTypes = new ArrayList<ColumnType>(); for (ColumnType ct : inputSchema.getColumnTypes()) { String colName = ct.getName(); int colIndex = inputSchema.getIndex(colName); if (!flattenColumnNameSet.contains(colName)) { outputColumnTypes.add(ct); } else { BlockSchema inputNestedColumnSchema = ct.getColumnSchema(); ColumnType[] ctypes = inputNestedColumnSchema.getColumnTypes(); if (ctypes.length == 1 && ctypes[0].getType() == DataType.TUPLE) inputNestedColumnSchema = ctypes[0].getColumnSchema(); List<ColumnType> flattedOutputColumnTypes = inputColumnIndexToOutputTypes.get(colIndex); if (flattedOutputColumnTypes != null && !flattedOutputColumnTypes.isEmpty()) { // output schema published in json. // TODO: assert output schema in json matches nested input schema for the column if (inputNestedColumnSchema == null || inputNestedColumnSchema.getColumnTypes() == null) throw new RuntimeException( "Invalid schema for columnn: " + colName + " column schema: " + inputNestedColumnSchema); if (flattedOutputColumnTypes.size() != inputNestedColumnSchema.getColumnTypes().length) throw new RuntimeException( "Output column specification does not match number of input fields for " + colName); } else { // output schema not published in json. Extract from nested input column schema if (inputNestedColumnSchema == null) { throw new RuntimeException("Schema is unknown for column: " + colName); } else { List<ColumnType> subColTypes = Arrays.asList(inputNestedColumnSchema.getColumnTypes()); flattedOutputColumnTypes = new ArrayList<ColumnType>(); flattedOutputColumnTypes.addAll(subColTypes); } inputColumnIndexToOutputTypes.put(colIndex, flattedOutputColumnTypes); } outputColumnTypes.addAll(flattedOutputColumnTypes); } } return new BlockSchema(outputColumnTypes.toArray(new ColumnType[0])); }