/** * Only read the columns that were requested in the constructor.<br> * * @param struct ColumnarStruct * @param path Path * @return Tuple * @throws IOException */ private Tuple readColumnarTuple(ColumnarStruct struct, Path path) throws IOException { int[] columnIndexes = getRequiredColumns(); // the partition keys if any will already be in the UDFContext here. String[] partitionKeys = getPartitionKeys(null, null); // only if the path has changed should be run the if (currentPath == null || !currentPath.equals(path)) { currentPathPartitionKeyMap = (partitionKeys == null) ? null : pathPartitionerHelper.getPathPartitionKeyValues(path.toString()); currentPath = path; } // if the partitionColumns is null this value will stop the for loop // below from trynig to add any partition columns // that do not exist int partitionColumnStartIndex = Integer.MAX_VALUE; if (!(partitionColumns == null || partitionColumns.size() == 0)) { // partition columns are always appended to the schema fields. partitionColumnStartIndex = pigSchema.getFields().length; } // create tuple with determined previous size Tuple t = tupleFactory.newTuple(columnIndexes.length); // read in all columns for (int i = 0; i < columnIndexes.length; i++) { int columnIndex = columnIndexes[i]; if (columnIndex < partitionColumnStartIndex) { Object obj = struct.getField(columnIndex); Object pigType = HiveRCSchemaUtil.extractPigTypeFromHiveType(obj); t.set(i, pigType); } else { // read the partition columns // will only be executed if partitionColumns is not null String key = partitionKeys[columnIndex - partitionColumnStartIndex]; Object value = currentPathPartitionKeyMap.get(key); t.set(i, value); } } return t; }
/** * Reads the partition columns * * @param location * @param job * @return */ private Set<String> getPartitionColumns(String location, Job job) { if (partitionColumns == null) { // read the partition columns from the UDF Context first. // if not in the UDF context then read it using the PathPartitioner. Properties properties = getUDFContext(); if (properties == null) properties = new Properties(); String partitionColumnStr = properties.getProperty(PathPartitionHelper.PARTITION_COLUMNS); if (partitionColumnStr == null && !(location == null || job == null)) { // if it hasn't been written yet. Set<String> partitionColumnSet; try { partitionColumnSet = pathPartitionerHelper.getPartitionKeys(location, job.getConfiguration()); } catch (IOException e) { RuntimeException rte = new RuntimeException(e); rte.setStackTrace(e.getStackTrace()); throw rte; } if (partitionColumnSet != null) { StringBuilder buff = new StringBuilder(); int i = 0; for (String column : partitionColumnSet) { if (i++ != 0) { buff.append(','); } buff.append(column); } String buffStr = buff.toString().trim(); if (buffStr.length() > 0) { properties.setProperty(PathPartitionHelper.PARTITION_COLUMNS, buff.toString()); } partitionColumns = partitionColumnSet; } } else { // the partition columns has been set already in the UDF Context if (partitionColumnStr != null) { String split[] = partitionColumnStr.split(","); partitionColumns = new LinkedHashSet<String>(); if (split.length > 0) { for (String splitItem : split) { partitionColumns.add(splitItem); } } } } } return partitionColumns; }