public static MessageType convert(StructType struct, FieldProjectionFilter filter) { State state = new State(new FieldsPath(), REPEATED, "ParquetSchema"); ConvertedField converted = struct.accept(new ThriftSchemaConvertVisitor(filter, true), state); if (!converted.isKeep()) { throw new ThriftProjectionException("No columns have been selected"); } return new MessageType(state.name, converted.asKeep().getType().asGroupType().getFields()); }
@Override public ConvertedField visit(StructType structType, State state) { // special care is taken when converting unions, // because we are actually both converting + projecting in // one pass, and unions need special handling when projecting. final boolean isUnion = isUnion(structType.getStructOrUnionType()); boolean hasSentinelUnionColumns = false; boolean hasNonSentinelUnionColumns = false; List<Type> convertedChildren = new ArrayList<Type>(); for (ThriftField child : structType.getChildren()) { State childState = new State(state.path.push(child), getRepetition(child), child.getName()); ConvertedField converted = child.getType().accept(this, childState); if (isUnion && !converted.isKeep()) { // user is not keeping this "kind" of union, but we still need // to keep at least one of the primitives of this union around. // in order to know what "kind" of union each record is. // TODO: in the future, we should just filter these records out instead // re-do the recursion, with a new projection filter that keeps only // the first primitive it encounters ConvertedField firstPrimitive = child .getType() .accept( new ThriftSchemaConvertVisitor(new KeepOnlyFirstPrimitiveFilter(), true), childState); convertedChildren.add(firstPrimitive.asKeep().getType().withId(child.getFieldId())); hasSentinelUnionColumns = true; } if (converted.isSentinelUnion()) { // child field is a sentinel union that we should drop if possible if (childState.repetition == REQUIRED) { // but this field is required, so we may still need it convertedChildren.add(converted.asSentinelUnion().getType().withId(child.getFieldId())); hasSentinelUnionColumns = true; } } else if (converted.isKeep()) { // user has selected this column, so we keep it. convertedChildren.add(converted.asKeep().getType().withId(child.getFieldId())); hasNonSentinelUnionColumns = true; } } if (!hasNonSentinelUnionColumns && hasSentinelUnionColumns) { // this is a union, and user has not requested any of the children // of this union. We should drop this union, if possible, but // we may not be able to, so tag this as a sentinel. return new SentinelUnion( state.path, new GroupType(state.repetition, state.name, convertedChildren)); } if (hasNonSentinelUnionColumns) { // user requested some of the fields of this struct, so we keep the struct return new Keep(state.path, new GroupType(state.repetition, state.name, convertedChildren)); } else { // user requested none of the fields of this struct, so we drop it return new Drop(state.path); } }