コード例 #1
0
  @Test
  public void testJSONBulkImporter() throws Exception {
    // Prepare input file:
    File inputFile = File.createTempFile("TestJSONImportInput", ".txt", getLocalTempDir());
    TestingResources.writeTextFile(
        inputFile, TestingResources.get(BulkImporterTestUtils.JSON_IMPORT_DATA));

    Configuration conf = getConf();
    conf.set(
        DescribedInputTextBulkImporter.CONF_FILE,
        BulkImporterTestUtils.localResource(BulkImporterTestUtils.FOO_IMPORT_DESCRIPTOR));

    // Run the bulk-import:
    final KijiMapReduceJob job =
        KijiBulkImportJobBuilder.create()
            .withConf(conf)
            .withBulkImporter(JSONBulkImporter.class)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString())))
            .withOutput(new DirectKijiTableMapReduceJobOutput(mTable.getURI()))
            .build();
    assertTrue(job.run());

    final Counters counters = job.getHadoopJob().getCounters();
    assertEquals(
        3, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_PROCESSED).getValue());
    assertEquals(
        1, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE).getValue());
    assertEquals(
        0, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_REJECTED).getValue());

    // Validate output:
    final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("info"));
    BulkImporterTestUtils.validateImportedRows(scanner, false);
    scanner.close();
  }
コード例 #2
0
    /**
     * Creates a new record reader that scans over a subset of rows from a Kiji table. The record
     * reader will scan over rows in the table specified in the provided input split, subject to row
     * limits specified in the data request serialized into the specified configuration.
     *
     * @param split for the MapReduce task that will use this record reader. The split specifies a
     *     subset of rows from a Kiji table.
     * @param configuration for the MapReduce job using this record reader. The configuration should
     *     specify the input Kiji table through the configuration variable {@link
     *     KijiConfKeys#KIJI_INPUT_TABLE_URI} and a serialized {@link KijiDataRequest} through the
     *     configuration variable {@link KijiConfKeys#KIJI_INPUT_DATA_REQUEST}.
     * @throws IOException if there is a problem constructing the record reader and opening the
     *     resources it requires.
     */
    public KijiTableRecordReader(InputSplit split, Configuration configuration) throws IOException {
      // Get data request from the job configuration.
      final String dataRequestB64 = configuration.get(KijiConfKeys.KIJI_INPUT_DATA_REQUEST);
      Preconditions.checkNotNull(dataRequestB64, "Missing data request in job configuration.");
      final byte[] dataRequestBytes = Base64.decodeBase64(Bytes.toBytes(dataRequestB64));
      mDataRequest = (KijiDataRequest) SerializationUtils.deserialize(dataRequestBytes);

      // Open connections to Kiji.
      assert split instanceof KijiTableSplit;
      mSplit = (KijiTableSplit) split;

      final KijiURI inputURI =
          KijiURI.newBuilder(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)).build();
      final KijiScannerOptions scannerOptions =
          new KijiScannerOptions()
              .setStartRow(new HBaseEntityId(mSplit.getStartRow()))
              .setStopRow(new HBaseEntityId(mSplit.getEndRow()));
      mKiji = Kiji.Factory.open(inputURI, configuration);
      mTable = mKiji.openTable(inputURI.getTable());
      mReader = mTable.openTableReader();
      mScanner = mReader.getScanner(mDataRequest, scannerOptions);
      mIterator = mScanner.iterator();
    }