示例#1
0
  /**
   * Only read the columns that were requested in the constructor.<br>
   *
   * @param struct ColumnarStruct
   * @param path Path
   * @return Tuple
   * @throws IOException
   */
  private Tuple readColumnarTuple(ColumnarStruct struct, Path path) throws IOException {

    int[] columnIndexes = getRequiredColumns();
    // the partition keys if any will already be in the UDFContext here.
    String[] partitionKeys = getPartitionKeys(null, null);
    // only if the path has changed should be run the
    if (currentPath == null || !currentPath.equals(path)) {
      currentPathPartitionKeyMap =
          (partitionKeys == null)
              ? null
              : pathPartitionerHelper.getPathPartitionKeyValues(path.toString());
      currentPath = path;
    }

    // if the partitionColumns is null this value will stop the for loop
    // below from trynig to add any partition columns
    // that do not exist
    int partitionColumnStartIndex = Integer.MAX_VALUE;

    if (!(partitionColumns == null || partitionColumns.size() == 0)) {
      // partition columns are always appended to the schema fields.
      partitionColumnStartIndex = pigSchema.getFields().length;
    }

    // create tuple with determined previous size
    Tuple t = tupleFactory.newTuple(columnIndexes.length);

    // read in all columns
    for (int i = 0; i < columnIndexes.length; i++) {
      int columnIndex = columnIndexes[i];

      if (columnIndex < partitionColumnStartIndex) {
        Object obj = struct.getField(columnIndex);
        Object pigType = HiveRCSchemaUtil.extractPigTypeFromHiveType(obj);

        t.set(i, pigType);

      } else {
        // read the partition columns
        // will only be executed if partitionColumns is not null
        String key = partitionKeys[columnIndex - partitionColumnStartIndex];
        Object value = currentPathPartitionKeyMap.get(key);
        t.set(i, value);
      }
    }

    return t;
  }
示例#2
0
  /**
   * Reads the partition columns
   *
   * @param location
   * @param job
   * @return
   */
  private Set<String> getPartitionColumns(String location, Job job) {

    if (partitionColumns == null) {
      // read the partition columns from the UDF Context first.
      // if not in the UDF context then read it using the PathPartitioner.

      Properties properties = getUDFContext();

      if (properties == null) properties = new Properties();

      String partitionColumnStr = properties.getProperty(PathPartitionHelper.PARTITION_COLUMNS);

      if (partitionColumnStr == null && !(location == null || job == null)) {
        // if it hasn't been written yet.
        Set<String> partitionColumnSet;

        try {
          partitionColumnSet =
              pathPartitionerHelper.getPartitionKeys(location, job.getConfiguration());
        } catch (IOException e) {

          RuntimeException rte = new RuntimeException(e);
          rte.setStackTrace(e.getStackTrace());
          throw rte;
        }

        if (partitionColumnSet != null) {

          StringBuilder buff = new StringBuilder();

          int i = 0;
          for (String column : partitionColumnSet) {
            if (i++ != 0) {
              buff.append(',');
            }

            buff.append(column);
          }

          String buffStr = buff.toString().trim();

          if (buffStr.length() > 0) {

            properties.setProperty(PathPartitionHelper.PARTITION_COLUMNS, buff.toString());
          }

          partitionColumns = partitionColumnSet;
        }

      } else {
        // the partition columns has been set already in the UDF Context
        if (partitionColumnStr != null) {
          String split[] = partitionColumnStr.split(",");
          partitionColumns = new LinkedHashSet<String>();
          if (split.length > 0) {
            for (String splitItem : split) {
              partitionColumns.add(splitItem);
            }
          }
        }
      }
    }

    return partitionColumns;
  }