示例#1
0
 @VisibleForTesting
 LazyHBaseRow(LazySimpleStructObjectInspector oi, ColumnMappings columnMappings) {
   super(oi);
   this.keyFactory = DefaultHBaseKeyFactory.forTest(null, columnMappings);
   this.valueFactories = null;
   this.columnsMapping = columnMappings.getColumnsMapping();
 }
  @Override
  public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {

    // obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
      TableMapReduceUtil.initCredentials(jobConf);
    }

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching =
        jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);

    if (hbaseColumnsMapping == null) {
      throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table.");
    }

    ColumnMappings columnMappings = null;
    try {
      columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
      throw new IOException(e);
    }

    int iKey = columnMappings.getKeyIndex();
    ColumnMapping keyMapping = columnMappings.getKeyMapping();

    // Take filter pushdown into account while calculating splits; this
    // allows us to prune off regions immediately.  Note that although
    // the Javadoc for the superclass getSplits says that it returns one
    // split per region, the implementation actually takes the scan
    // definition into account and excludes regions which don't satisfy
    // the start/stop row conditions (HBASE-1829).
    Scan scan =
        createFilterScan(
            jobConf,
            iKey,
            HiveHBaseInputFormatUtil.getStorageFormatOfKey(
                keyMapping.mappingSpec,
                jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    // REVIEW:  are we supposed to be applying the getReadColumnIDs
    // same as in getRecordReader?
    for (ColumnMapping colMap : columnMappings) {
      if (colMap.hbaseRowKey) {
        continue;
      }

      if (colMap.qualifierName == null) {
        scan.addFamily(colMap.familyNameBytes);
        addedFamilies.add(colMap.familyName);
      } else {
        if (!addedFamilies.contains(colMap.familyName)) {
          // add the column only if the family has not already been added
          scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
        }
      }
    }
    setScan(scan);

    Job job = new Job(jobConf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext);
    InputSplit[] results = new InputSplit[splits.size()];

    for (int i = 0; i < splits.size(); i++) {
      results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]);
    }

    return results;
  }