/** * Gets a set of input splits for a MapReduce job running over a Kiji table. One split is created * per region in the input Kiji table. * * @param configuration of the job using the splits. The configuration should specify the input * Kiji table being used, through the configuration variable {@link * KijiConfKeys#KIJI_INPUT_TABLE_URI}. * @param numSplits desired for the job. This framework hint is ignored by this method. * @return an array of input splits to be operated on in the MapReduce job. * @throws IOException if an I/O error occurs while communicating with HBase to determine the * regions in the Kiji table. */ @Override public InputSplit[] getSplits(JobConf configuration, int numSplits) throws IOException { final String uriString = Preconditions.checkNotNull(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)); final KijiURI inputTableURI = KijiURI.newBuilder(uriString).build(); final Kiji kiji = Kiji.Factory.open(inputTableURI, configuration); try { final KijiTable table = kiji.openTable(inputTableURI.getTable()); try { final HTableInterface htable = HBaseKijiTable.downcast(table).getHTable(); final List<InputSplit> splits = Lists.newArrayList(); for (KijiRegion region : table.getRegions()) { final byte[] startKey = region.getStartKey(); // TODO(KIJIMR-65): For now pick the first available location (ie. region server), if any. final String location = region.getLocations().isEmpty() ? null : region.getLocations().iterator().next(); final TableSplit tableSplit = new TableSplit(htable.getTableName(), startKey, region.getEndKey(), location); splits.add(new KijiTableSplit(tableSplit)); } return splits.toArray(new InputSplit[0]); } finally { table.release(); } } finally { kiji.release(); } }