/**
   * Gets a set of input splits for a MapReduce job running over a Kiji table. One split is created
   * per region in the input Kiji table.
   *
   * @param configuration of the job using the splits. The configuration should specify the input
   *     Kiji table being used, through the configuration variable {@link
   *     KijiConfKeys#KIJI_INPUT_TABLE_URI}.
   * @param numSplits desired for the job. This framework hint is ignored by this method.
   * @return an array of input splits to be operated on in the MapReduce job.
   * @throws IOException if an I/O error occurs while communicating with HBase to determine the
   *     regions in the Kiji table.
   */
  @Override
  public InputSplit[] getSplits(JobConf configuration, int numSplits) throws IOException {
    final String uriString =
        Preconditions.checkNotNull(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI));
    final KijiURI inputTableURI = KijiURI.newBuilder(uriString).build();
    final Kiji kiji = Kiji.Factory.open(inputTableURI, configuration);
    try {
      final KijiTable table = kiji.openTable(inputTableURI.getTable());
      try {
        final HTableInterface htable = HBaseKijiTable.downcast(table).getHTable();

        final List<InputSplit> splits = Lists.newArrayList();
        for (KijiRegion region : table.getRegions()) {
          final byte[] startKey = region.getStartKey();
          // TODO(KIJIMR-65): For now pick the first available location (ie. region server), if any.
          final String location =
              region.getLocations().isEmpty() ? null : region.getLocations().iterator().next();
          final TableSplit tableSplit =
              new TableSplit(htable.getTableName(), startKey, region.getEndKey(), location);
          splits.add(new KijiTableSplit(tableSplit));
        }
        return splits.toArray(new InputSplit[0]);

      } finally {
        table.release();
      }
    } finally {
      kiji.release();
    }
  }
  /**
   * Generates a split for a given table.
   *
   * @param tableURI URI of the Kiji table to split.
   * @param nsplits Number of splits.
   * @param conf Base Hadoop configuration used to open the Kiji instance.
   * @return a list of split start keys, as HFileKeyValue (with no value, just the keys).
   * @throws IOException on I/O error.
   */
  private static List<HFileKeyValue> makeTableKeySplit(
      KijiURI tableURI, int nsplits, Configuration conf) throws IOException {
    final Kiji kiji = Kiji.Factory.open(tableURI, conf);
    try {
      final KijiTable table = kiji.openTable(tableURI.getTable());
      try {
        if (NUM_SPLITS_AUTO == nsplits) {
          final List<HFileKeyValue> startKeys = Lists.newArrayList();
          for (KijiRegion region : table.getRegions()) {
            startKeys.add(HFileKeyValue.createFromRowKey(region.getStartKey()));
          }
          return startKeys;

        } else {
          switch (KijiTableLayout.getEncoding(table.getLayout().getDesc().getKeysFormat())) {
            case RAW:
              {
                // The user has explicitly specified how many HFiles to create, but this is not
                // possible when row key hashing is disabled.
                throw new JobConfigurationException(
                    String.format(
                        "Table '%s' has row key hashing disabled, so the number of HFile splits must be"
                            + "determined by the number of HRegions in the HTable. "
                            + "Use an HFileMapReduceJobOutput constructor that enables auto splitting.",
                        table.getName()));
              }
            case FORMATTED:
            case HASH:
            case HASH_PREFIX:
              {
                // Those cases are supported:
                break;
              }
            default:
              throw new RuntimeException(
                  "Unhandled row key encoding: "
                      + KijiTableLayout.getEncoding(table.getLayout().getDesc().getKeysFormat()));
          }
          return generateEvenStartKeys(nsplits);
        }
      } finally {
        ResourceUtils.releaseOrLog(table);
      }
    } finally {
      ResourceUtils.releaseOrLog(kiji);
    }
  }
    /**
     * Initializes a new table-wide record writer.
     *
     * @param oformat KijiHFileOutputFormat this writer is built from.
     * @param context Context of the task.
     * @throws IOException on I/O error.
     */
    public TableRecordWriter(KijiHFileOutputFormat oformat, TaskAttemptContext context)
        throws IOException {
      mContext = Preconditions.checkNotNull(context);
      mConf = mContext.getConfiguration();
      mLatestTimestamp = mConf.getLong(CONF_LATEST_TIMESTAMP, System.currentTimeMillis());
      mLatestTimestampBytes = toBytes(mLatestTimestamp);

      mOutputDir = oformat.getDefaultWorkFile(mContext, OUTPUT_EXTENSION);
      mFileSystem = mOutputDir.getFileSystem(mConf);

      mTableURI = KijiURI.newBuilder(mConf.get(KijiConfKeys.KIJI_OUTPUT_TABLE_URI)).build();

      final Kiji kiji = Kiji.Factory.open(mTableURI, mConf);
      final KijiTable table = kiji.openTable(mTableURI.getTable());
      mLayout = table.getLayout();
      ResourceUtils.releaseOrLog(table);
      ResourceUtils.releaseOrLog(kiji);
    }
    /**
     * Creates a new record reader that scans over a subset of rows from a Kiji table. The record
     * reader will scan over rows in the table specified in the provided input split, subject to row
     * limits specified in the data request serialized into the specified configuration.
     *
     * @param split for the MapReduce task that will use this record reader. The split specifies a
     *     subset of rows from a Kiji table.
     * @param configuration for the MapReduce job using this record reader. The configuration should
     *     specify the input Kiji table through the configuration variable {@link
     *     KijiConfKeys#KIJI_INPUT_TABLE_URI} and a serialized {@link KijiDataRequest} through the
     *     configuration variable {@link KijiConfKeys#KIJI_INPUT_DATA_REQUEST}.
     * @throws IOException if there is a problem constructing the record reader and opening the
     *     resources it requires.
     */
    public KijiTableRecordReader(InputSplit split, Configuration configuration) throws IOException {
      // Get data request from the job configuration.
      final String dataRequestB64 = configuration.get(KijiConfKeys.KIJI_INPUT_DATA_REQUEST);
      Preconditions.checkNotNull(dataRequestB64, "Missing data request in job configuration.");
      final byte[] dataRequestBytes = Base64.decodeBase64(Bytes.toBytes(dataRequestB64));
      mDataRequest = (KijiDataRequest) SerializationUtils.deserialize(dataRequestBytes);

      // Open connections to Kiji.
      assert split instanceof KijiTableSplit;
      mSplit = (KijiTableSplit) split;

      final KijiURI inputURI =
          KijiURI.newBuilder(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)).build();
      final KijiScannerOptions scannerOptions =
          new KijiScannerOptions()
              .setStartRow(new HBaseEntityId(mSplit.getStartRow()))
              .setStopRow(new HBaseEntityId(mSplit.getEndRow()));
      mKiji = Kiji.Factory.open(inputURI, configuration);
      mTable = mKiji.openTable(inputURI.getTable());
      mReader = mTable.openTableReader();
      mScanner = mReader.getScanner(mDataRequest, scannerOptions);
      mIterator = mScanner.iterator();
    }
Beispiel #5
0
  /**
   * Run the entry addition system. Asks the user for values for all fields and then fills them in.
   *
   * @param args Command line arguments; this is expected to be empty.
   * @return Exit status code for the application; 0 indicates success.
   * @throws IOException If an error contacting Kiji occurs.
   * @throws InterruptedException If the process is interrupted while performing I/O.
   */
  @Override
  public int run(String[] args) throws IOException, InterruptedException {
    final ConsolePrompt console = new ConsolePrompt();

    // Interactively prompt the user for the record fields from the console.
    final String first = console.readLine("First name: ");
    final String last = console.readLine("Last name: ");
    final String email = console.readLine("Email address: ");
    final String telephone = console.readLine("Telephone: ");

    final Address addr = new Address();
    addr.setAddr1(console.readLine("Address line 1: "));

    // Optional: apartment.
    final String aptNumStr = console.readLine("Apartment: ");
    if (!aptNumStr.isEmpty()) {
      addr.setApt(aptNumStr);
    }

    // Optional: address line 2.
    addr.setAddr2(console.readLine("Address line 2: "));
    if (addr.getAddr2().length() == 0) {
      addr.setAddr2(null);
    }

    addr.setCity(console.readLine("City: "));
    addr.setState(console.readLine("State: "));
    addr.setZip(Integer.valueOf(console.readLine("Zip: ")));

    Kiji kiji = null;
    KijiTable table = null;
    KijiTableWriter writer = null;
    try {
      // Load HBase configuration before connecting to Kiji.
      setConf(HBaseConfiguration.addHbaseResources(getConf()));

      // Connect to Kiji and open the table.
      kiji =
          Kiji.Factory.open(
              new KijiConfiguration(getConf(), KijiConfiguration.DEFAULT_INSTANCE_NAME));
      table = kiji.openTable(TABLE_NAME);
      writer = table.openTableWriter();

      // Create a row ID with the first and last name.
      final EntityId user = table.getEntityId(first + "," + last);

      // Write the record fields to appropriate table columns in the row.
      // The column names are specified as constants in the Fields.java class.
      final long timestamp = System.currentTimeMillis();
      writer.put(user, Fields.INFO_FAMILY, Fields.FIRST_NAME, timestamp, first);
      writer.put(user, Fields.INFO_FAMILY, Fields.LAST_NAME, timestamp, last);
      writer.put(user, Fields.INFO_FAMILY, Fields.EMAIL, timestamp, email);
      writer.put(user, Fields.INFO_FAMILY, Fields.TELEPHONE, timestamp, telephone);
      writer.put(user, Fields.INFO_FAMILY, Fields.ADDRESS, timestamp, addr);
    } catch (KijiTableNotFoundException e) {
      System.out.println("Could not find Kiji table: " + TABLE_NAME);
      return 1;
    } finally {
      // Safely free up resources by closing in reverse order.
      IOUtils.closeQuietly(writer);
      IOUtils.closeQuietly(table);
      ReferenceCountableUtils.releaseQuietly(kiji);
      IOUtils.closeQuietly(console);
    }

    return 0;
  }