@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (!partitionKeysSet) { Set<String> keys = getPartitionColumns(location, job); if (!(keys == null || keys.size() == 0)) { // re-edit the pigSchema to contain the new partition keys. ResourceFieldSchema[] fields = pigSchema.getFields(); LOG.debug("Schema: " + Arrays.toString(fields)); ResourceFieldSchema[] newFields = Arrays.copyOf(fields, fields.length + keys.size()); int index = fields.length; for (String key : keys) { newFields[index++] = new ResourceFieldSchema(new FieldSchema(key, DataType.CHARARRAY)); } pigSchema.setFields(newFields); LOG.debug("Added partition fields: " + keys + " to loader schema"); LOG.debug("Schema is: " + Arrays.toString(newFields)); } partitionKeysSet = true; } return pigSchema; }
/** wrap a pig schema as tuple */ public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException { ResourceSchema listSchema = new ResourceSchema(); listSchema.setFields(new ResourceFieldSchema[] {subFieldSchema}); ResourceFieldSchema tupleWrapper = new ResourceFieldSchema(); tupleWrapper.setType(DataType.TUPLE); tupleWrapper.setName(PIG_TUPLE_WRAPPER); tupleWrapper.setSchema(listSchema); return tupleWrapper; }