@Test public void testJSONBulkImporter() throws Exception { // Prepare input file: File inputFile = File.createTempFile("TestJSONImportInput", ".txt", getLocalTempDir()); TestingResources.writeTextFile( inputFile, TestingResources.get(BulkImporterTestUtils.JSON_IMPORT_DATA)); Configuration conf = getConf(); conf.set( DescribedInputTextBulkImporter.CONF_FILE, BulkImporterTestUtils.localResource(BulkImporterTestUtils.FOO_IMPORT_DESCRIPTOR)); // Run the bulk-import: final KijiMapReduceJob job = KijiBulkImportJobBuilder.create() .withConf(conf) .withBulkImporter(JSONBulkImporter.class) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString()))) .withOutput(new DirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); final Counters counters = job.getHadoopJob().getCounters(); assertEquals( 3, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_PROCESSED).getValue()); assertEquals( 1, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE).getValue()); assertEquals( 0, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_REJECTED).getValue()); // Validate output: final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("info")); BulkImporterTestUtils.validateImportedRows(scanner, false); scanner.close(); }
/** * Creates a new record reader that scans over a subset of rows from a Kiji table. The record * reader will scan over rows in the table specified in the provided input split, subject to row * limits specified in the data request serialized into the specified configuration. * * @param split for the MapReduce task that will use this record reader. The split specifies a * subset of rows from a Kiji table. * @param configuration for the MapReduce job using this record reader. The configuration should * specify the input Kiji table through the configuration variable {@link * KijiConfKeys#KIJI_INPUT_TABLE_URI} and a serialized {@link KijiDataRequest} through the * configuration variable {@link KijiConfKeys#KIJI_INPUT_DATA_REQUEST}. * @throws IOException if there is a problem constructing the record reader and opening the * resources it requires. */ public KijiTableRecordReader(InputSplit split, Configuration configuration) throws IOException { // Get data request from the job configuration. final String dataRequestB64 = configuration.get(KijiConfKeys.KIJI_INPUT_DATA_REQUEST); Preconditions.checkNotNull(dataRequestB64, "Missing data request in job configuration."); final byte[] dataRequestBytes = Base64.decodeBase64(Bytes.toBytes(dataRequestB64)); mDataRequest = (KijiDataRequest) SerializationUtils.deserialize(dataRequestBytes); // Open connections to Kiji. assert split instanceof KijiTableSplit; mSplit = (KijiTableSplit) split; final KijiURI inputURI = KijiURI.newBuilder(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)).build(); final KijiScannerOptions scannerOptions = new KijiScannerOptions() .setStartRow(new HBaseEntityId(mSplit.getStartRow())) .setStopRow(new HBaseEntityId(mSplit.getEndRow())); mKiji = Kiji.Factory.open(inputURI, configuration); mTable = mKiji.openTable(inputURI.getTable()); mReader = mTable.openTableReader(); mScanner = mReader.getScanner(mDataRequest, scannerOptions); mIterator = mScanner.iterator(); }