Beispiel #1
0
  private <T> T populateFromRow(
      EntitySpec<T> spec, T entity, long startTime, long endTime, Object... entityIdComponents)
      throws IOException {

    // TODO: Use a pool of tables and/or table readers
    final KijiTable table = mKiji.openTable(spec.getTableName());
    try {
      final KijiTableReader reader = table.openTableReader();
      try {
        final KijiDataRequestBuilder builder = KijiDataRequest.builder();
        builder.withTimeRange(startTime, endTime);
        spec.populateColumnRequests(builder);
        final KijiDataRequest dataRequest = builder.build();
        final EntityId entityId = table.getEntityId(entityIdComponents);
        final KijiRowData row = reader.get(entityId, dataRequest);

        try {
          return spec.populateEntityFromRow(entity, row);
        } catch (IllegalAccessException iae) {
          throw new RuntimeException(iae);
        }

      } finally {
        reader.close();
      }
    } finally {
      table.release();
    }
  }
  @Test
  public void testBuilder() throws Exception {
    final KijiTableLayout layout =
        KijiTableLayout.newLayout(KijiTableLayouts.getLayout(KijiTableLayouts.SIMPLE));

    final Kiji kiji =
        new InstanceBuilder()
            .withTable("table", layout)
            .withRow("row1")
            .withFamily("family")
            .withQualifier("column")
            .withValue(1, "foo1")
            .withValue(2, "foo2")
            .withRow("row2")
            .withFamily("family")
            .withQualifier("column")
            .withValue(100, "foo3")
            .build();

    final KijiTable table = kiji.openTable("table");
    final KijiTableReader reader = table.openTableReader();

    // Verify the first row.
    final KijiDataRequest req = KijiDataRequest.create("family", "column");
    final KijiRowData row1 = reader.get(table.getEntityId("row1"), req);
    assertEquals("foo2", row1.getValue("family", "column", 2).toString());

    // Verify the second row.
    final KijiRowData row2 = reader.get(table.getEntityId("row2"), req);
    assertEquals("foo3", row2.getValue("family", "column", 100).toString());

    ResourceUtils.closeOrLog(reader);
    ResourceUtils.releaseOrLog(table);
    ResourceUtils.releaseOrLog(kiji);
  }
  @Test
  public void testJSONBulkImporter() throws Exception {
    // Prepare input file:
    File inputFile = File.createTempFile("TestJSONImportInput", ".txt", getLocalTempDir());
    TestingResources.writeTextFile(
        inputFile, TestingResources.get(BulkImporterTestUtils.JSON_IMPORT_DATA));

    Configuration conf = getConf();
    conf.set(
        DescribedInputTextBulkImporter.CONF_FILE,
        BulkImporterTestUtils.localResource(BulkImporterTestUtils.FOO_IMPORT_DESCRIPTOR));

    // Run the bulk-import:
    final KijiMapReduceJob job =
        KijiBulkImportJobBuilder.create()
            .withConf(conf)
            .withBulkImporter(JSONBulkImporter.class)
            .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString())))
            .withOutput(new DirectKijiTableMapReduceJobOutput(mTable.getURI()))
            .build();
    assertTrue(job.run());

    final Counters counters = job.getHadoopJob().getCounters();
    assertEquals(
        3, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_PROCESSED).getValue());
    assertEquals(
        1, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE).getValue());
    assertEquals(
        0, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_REJECTED).getValue());

    // Validate output:
    final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("info"));
    BulkImporterTestUtils.validateImportedRows(scanner, false);
    scanner.close();
  }
    /**
     * Creates a new record reader that scans over a subset of rows from a Kiji table. The record
     * reader will scan over rows in the table specified in the provided input split, subject to row
     * limits specified in the data request serialized into the specified configuration.
     *
     * @param split for the MapReduce task that will use this record reader. The split specifies a
     *     subset of rows from a Kiji table.
     * @param configuration for the MapReduce job using this record reader. The configuration should
     *     specify the input Kiji table through the configuration variable {@link
     *     KijiConfKeys#KIJI_INPUT_TABLE_URI} and a serialized {@link KijiDataRequest} through the
     *     configuration variable {@link KijiConfKeys#KIJI_INPUT_DATA_REQUEST}.
     * @throws IOException if there is a problem constructing the record reader and opening the
     *     resources it requires.
     */
    public KijiTableRecordReader(InputSplit split, Configuration configuration) throws IOException {
      // Get data request from the job configuration.
      final String dataRequestB64 = configuration.get(KijiConfKeys.KIJI_INPUT_DATA_REQUEST);
      Preconditions.checkNotNull(dataRequestB64, "Missing data request in job configuration.");
      final byte[] dataRequestBytes = Base64.decodeBase64(Bytes.toBytes(dataRequestB64));
      mDataRequest = (KijiDataRequest) SerializationUtils.deserialize(dataRequestBytes);

      // Open connections to Kiji.
      assert split instanceof KijiTableSplit;
      mSplit = (KijiTableSplit) split;

      final KijiURI inputURI =
          KijiURI.newBuilder(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)).build();
      final KijiScannerOptions scannerOptions =
          new KijiScannerOptions()
              .setStartRow(new HBaseEntityId(mSplit.getStartRow()))
              .setStopRow(new HBaseEntityId(mSplit.getEndRow()));
      mKiji = Kiji.Factory.open(inputURI, configuration);
      mTable = mKiji.openTable(inputURI.getTable());
      mReader = mTable.openTableReader();
      mScanner = mReader.getScanner(mDataRequest, scannerOptions);
      mIterator = mScanner.iterator();
    }
 @After
 public final void teardownTestBulkImporter() throws Exception {
   mReader.close();
   mTable.release();
 }