/** * Gets a set of input splits for a MapReduce job running over a Kiji table. One split is created * per region in the input Kiji table. * * @param configuration of the job using the splits. The configuration should specify the input * Kiji table being used, through the configuration variable {@link * KijiConfKeys#KIJI_INPUT_TABLE_URI}. * @param numSplits desired for the job. This framework hint is ignored by this method. * @return an array of input splits to be operated on in the MapReduce job. * @throws IOException if an I/O error occurs while communicating with HBase to determine the * regions in the Kiji table. */ @Override public InputSplit[] getSplits(JobConf configuration, int numSplits) throws IOException { final String uriString = Preconditions.checkNotNull(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)); final KijiURI inputTableURI = KijiURI.newBuilder(uriString).build(); final Kiji kiji = Kiji.Factory.open(inputTableURI, configuration); try { final KijiTable table = kiji.openTable(inputTableURI.getTable()); try { final HTableInterface htable = HBaseKijiTable.downcast(table).getHTable(); final List<InputSplit> splits = Lists.newArrayList(); for (KijiRegion region : table.getRegions()) { final byte[] startKey = region.getStartKey(); // TODO(KIJIMR-65): For now pick the first available location (ie. region server), if any. final String location = region.getLocations().isEmpty() ? null : region.getLocations().iterator().next(); final TableSplit tableSplit = new TableSplit(htable.getTableName(), startKey, region.getEndKey(), location); splits.add(new KijiTableSplit(tableSplit)); } return splits.toArray(new InputSplit[0]); } finally { table.release(); } } finally { kiji.release(); } }
/** * Generates a split for a given table. * * @param tableURI URI of the Kiji table to split. * @param nsplits Number of splits. * @param conf Base Hadoop configuration used to open the Kiji instance. * @return a list of split start keys, as HFileKeyValue (with no value, just the keys). * @throws IOException on I/O error. */ private static List<HFileKeyValue> makeTableKeySplit( KijiURI tableURI, int nsplits, Configuration conf) throws IOException { final Kiji kiji = Kiji.Factory.open(tableURI, conf); try { final KijiTable table = kiji.openTable(tableURI.getTable()); try { if (NUM_SPLITS_AUTO == nsplits) { final List<HFileKeyValue> startKeys = Lists.newArrayList(); for (KijiRegion region : table.getRegions()) { startKeys.add(HFileKeyValue.createFromRowKey(region.getStartKey())); } return startKeys; } else { switch (KijiTableLayout.getEncoding(table.getLayout().getDesc().getKeysFormat())) { case RAW: { // The user has explicitly specified how many HFiles to create, but this is not // possible when row key hashing is disabled. throw new JobConfigurationException( String.format( "Table '%s' has row key hashing disabled, so the number of HFile splits must be" + "determined by the number of HRegions in the HTable. " + "Use an HFileMapReduceJobOutput constructor that enables auto splitting.", table.getName())); } case FORMATTED: case HASH: case HASH_PREFIX: { // Those cases are supported: break; } default: throw new RuntimeException( "Unhandled row key encoding: " + KijiTableLayout.getEncoding(table.getLayout().getDesc().getKeysFormat())); } return generateEvenStartKeys(nsplits); } } finally { ResourceUtils.releaseOrLog(table); } } finally { ResourceUtils.releaseOrLog(kiji); } }
/** * Initializes a new table-wide record writer. * * @param oformat KijiHFileOutputFormat this writer is built from. * @param context Context of the task. * @throws IOException on I/O error. */ public TableRecordWriter(KijiHFileOutputFormat oformat, TaskAttemptContext context) throws IOException { mContext = Preconditions.checkNotNull(context); mConf = mContext.getConfiguration(); mLatestTimestamp = mConf.getLong(CONF_LATEST_TIMESTAMP, System.currentTimeMillis()); mLatestTimestampBytes = toBytes(mLatestTimestamp); mOutputDir = oformat.getDefaultWorkFile(mContext, OUTPUT_EXTENSION); mFileSystem = mOutputDir.getFileSystem(mConf); mTableURI = KijiURI.newBuilder(mConf.get(KijiConfKeys.KIJI_OUTPUT_TABLE_URI)).build(); final Kiji kiji = Kiji.Factory.open(mTableURI, mConf); final KijiTable table = kiji.openTable(mTableURI.getTable()); mLayout = table.getLayout(); ResourceUtils.releaseOrLog(table); ResourceUtils.releaseOrLog(kiji); }
/** * Creates a new record reader that scans over a subset of rows from a Kiji table. The record * reader will scan over rows in the table specified in the provided input split, subject to row * limits specified in the data request serialized into the specified configuration. * * @param split for the MapReduce task that will use this record reader. The split specifies a * subset of rows from a Kiji table. * @param configuration for the MapReduce job using this record reader. The configuration should * specify the input Kiji table through the configuration variable {@link * KijiConfKeys#KIJI_INPUT_TABLE_URI} and a serialized {@link KijiDataRequest} through the * configuration variable {@link KijiConfKeys#KIJI_INPUT_DATA_REQUEST}. * @throws IOException if there is a problem constructing the record reader and opening the * resources it requires. */ public KijiTableRecordReader(InputSplit split, Configuration configuration) throws IOException { // Get data request from the job configuration. final String dataRequestB64 = configuration.get(KijiConfKeys.KIJI_INPUT_DATA_REQUEST); Preconditions.checkNotNull(dataRequestB64, "Missing data request in job configuration."); final byte[] dataRequestBytes = Base64.decodeBase64(Bytes.toBytes(dataRequestB64)); mDataRequest = (KijiDataRequest) SerializationUtils.deserialize(dataRequestBytes); // Open connections to Kiji. assert split instanceof KijiTableSplit; mSplit = (KijiTableSplit) split; final KijiURI inputURI = KijiURI.newBuilder(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)).build(); final KijiScannerOptions scannerOptions = new KijiScannerOptions() .setStartRow(new HBaseEntityId(mSplit.getStartRow())) .setStopRow(new HBaseEntityId(mSplit.getEndRow())); mKiji = Kiji.Factory.open(inputURI, configuration); mTable = mKiji.openTable(inputURI.getTable()); mReader = mTable.openTableReader(); mScanner = mReader.getScanner(mDataRequest, scannerOptions); mIterator = mScanner.iterator(); }
/** * Run the entry addition system. Asks the user for values for all fields and then fills them in. * * @param args Command line arguments; this is expected to be empty. * @return Exit status code for the application; 0 indicates success. * @throws IOException If an error contacting Kiji occurs. * @throws InterruptedException If the process is interrupted while performing I/O. */ @Override public int run(String[] args) throws IOException, InterruptedException { final ConsolePrompt console = new ConsolePrompt(); // Interactively prompt the user for the record fields from the console. final String first = console.readLine("First name: "); final String last = console.readLine("Last name: "); final String email = console.readLine("Email address: "); final String telephone = console.readLine("Telephone: "); final Address addr = new Address(); addr.setAddr1(console.readLine("Address line 1: ")); // Optional: apartment. final String aptNumStr = console.readLine("Apartment: "); if (!aptNumStr.isEmpty()) { addr.setApt(aptNumStr); } // Optional: address line 2. addr.setAddr2(console.readLine("Address line 2: ")); if (addr.getAddr2().length() == 0) { addr.setAddr2(null); } addr.setCity(console.readLine("City: ")); addr.setState(console.readLine("State: ")); addr.setZip(Integer.valueOf(console.readLine("Zip: "))); Kiji kiji = null; KijiTable table = null; KijiTableWriter writer = null; try { // Load HBase configuration before connecting to Kiji. setConf(HBaseConfiguration.addHbaseResources(getConf())); // Connect to Kiji and open the table. kiji = Kiji.Factory.open( new KijiConfiguration(getConf(), KijiConfiguration.DEFAULT_INSTANCE_NAME)); table = kiji.openTable(TABLE_NAME); writer = table.openTableWriter(); // Create a row ID with the first and last name. final EntityId user = table.getEntityId(first + "," + last); // Write the record fields to appropriate table columns in the row. // The column names are specified as constants in the Fields.java class. final long timestamp = System.currentTimeMillis(); writer.put(user, Fields.INFO_FAMILY, Fields.FIRST_NAME, timestamp, first); writer.put(user, Fields.INFO_FAMILY, Fields.LAST_NAME, timestamp, last); writer.put(user, Fields.INFO_FAMILY, Fields.EMAIL, timestamp, email); writer.put(user, Fields.INFO_FAMILY, Fields.TELEPHONE, timestamp, telephone); writer.put(user, Fields.INFO_FAMILY, Fields.ADDRESS, timestamp, addr); } catch (KijiTableNotFoundException e) { System.out.println("Could not find Kiji table: " + TABLE_NAME); return 1; } finally { // Safely free up resources by closing in reverse order. IOUtils.closeQuietly(writer); IOUtils.closeQuietly(table); ReferenceCountableUtils.releaseQuietly(kiji); IOUtils.closeQuietly(console); } return 0; }