Java HFileKeyValue Exemples, org.kiji.mapreduce.framework.HFileKeyValue Java Exemples

Exemple #1

0

Afficher le fichier

Fichier : HFileMapReduceJobOutput.java Projet : jeffkole/kiji-mapreduce

  /**
   * Write out a SequenceFile that can be read by TotalOrderPartitioner that contains the split
   * points in startKeys.
   *
   * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0. I had to copy it
   * because it's private.
   *
   * @param conf The job configuration.
   * @param partitionsPath output path for SequenceFile.
   * @param startKeys the region start keys to use as the partitions.
   * @throws IOException If there is an error.
   */
  private static void writePartitionFile(
      Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys) throws IOException {
    if (startKeys.isEmpty()) {
      throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0.
    TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>();
    sorted.addAll(startKeys);

    HFileKeyValue first = sorted.first();
    if (0 != first.getRowKey().length) {
      throw new IllegalArgumentException(
          "First region of table should have empty start row key. Instead has: "
              + Bytes.toStringBinary(first.getRowKey()));
    }
    sorted.remove(first);

    // Write the actual file
    final SequenceFile.Writer writer =
        KijiMRPlatformBridge.get()
            .newSeqFileWriter(conf, partitionsPath, HFileKeyValue.class, NullWritable.class);

    try {
      for (HFileKeyValue startKey : sorted) {
        writer.append(startKey, NullWritable.get());
      }
    } finally {
      writer.close();
    }
  }

Exemple #2

0

Afficher le fichier

Fichier : HFileMapReduceJobOutput.java Projet : jeffkole/kiji-mapreduce

  /**
   * Generate a list of start keys (one per region). Since we know that the row keys in kiji are
   * byte strings of length 16, we can reliably split them evenly.
   *
   * @param numRegions The number of regions to generate start keys for.
   * @return A list of start keys with size equal to <code>numRegions</code>.
   */
  private static List<HFileKeyValue> generateEvenStartKeys(int numRegions) {
    List<HFileKeyValue> startKeys = new ArrayList<HFileKeyValue>(numRegions);

    // The first key is a special case, it must be empty.
    startKeys.add(HFileKeyValue.createFromRowKey(HConstants.EMPTY_BYTE_ARRAY));

    if (numRegions > 1) {
      byte[][] splitKeys = KijiRowKeySplitter.get().getSplitKeys(numRegions);
      for (byte[] hbaseRowKey : splitKeys) {
        startKeys.add(HFileKeyValue.createFromRowKey(hbaseRowKey));
      }
    }
    return startKeys;
  }

Exemple #3

0

Afficher le fichier

Fichier : KijiHFileOutputFormat.java Projet : jeffkole/kiji-mapreduce

      /** {@inheritDoc} */
      @Override
      public void write(HFileKeyValue entry, NullWritable unused) throws IOException {

        final KeyValue kv = entry.getKeyValue();
        kv.updateLatestStamp(mLatestTimestampBytes);

        final long recordLength = kv.getLength();
        if (mCurrentHFileSize + recordLength >= mMaxFileSizeBytes) {
          // We can't fit this record in the current HFile without exceeding the max file size.

          if (Arrays.equals(mCurrentRow, kv.getRow())) {
            // But we're still adding data for a single row, so we can't close this HFile yet.
            LOG.debug("Reached max HFile size, but waiting to finish this row before closing.");
          } else {
            // Close it and open a new one.
            closeWriter(mWriter);
            mWriter = openNewWriter();
          }
        }

        mWriter.append(kv);
        mTimeRangeTracker.includeTimestamp(kv);
        mCurrentHFileSize += recordLength;

        // Remember the row so we know when we are transitioning.
        mCurrentRow = kv.getRow();
      }

Exemple #4

0

Afficher le fichier

Fichier : HFileMapReduceJobOutput.java Projet : jeffkole/kiji-mapreduce

  /**
   * Generates a split for a given table.
   *
   * @param tableURI URI of the Kiji table to split.
   * @param nsplits Number of splits.
   * @param conf Base Hadoop configuration used to open the Kiji instance.
   * @return a list of split start keys, as HFileKeyValue (with no value, just the keys).
   * @throws IOException on I/O error.
   */
  private static List<HFileKeyValue> makeTableKeySplit(
      KijiURI tableURI, int nsplits, Configuration conf) throws IOException {
    final Kiji kiji = Kiji.Factory.open(tableURI, conf);
    try {
      final KijiTable table = kiji.openTable(tableURI.getTable());
      try {
        if (NUM_SPLITS_AUTO == nsplits) {
          final List<HFileKeyValue> startKeys = Lists.newArrayList();
          for (KijiRegion region : table.getRegions()) {
            startKeys.add(HFileKeyValue.createFromRowKey(region.getStartKey()));
          }
          return startKeys;

        } else {
          switch (KijiTableLayout.getEncoding(table.getLayout().getDesc().getKeysFormat())) {
            case RAW:
              {
                // The user has explicitly specified how many HFiles to create, but this is not
                // possible when row key hashing is disabled.
                throw new JobConfigurationException(
                    String.format(
                        "Table '%s' has row key hashing disabled, so the number of HFile splits must be"
                            + "determined by the number of HRegions in the HTable. "
                            + "Use an HFileMapReduceJobOutput constructor that enables auto splitting.",
                        table.getName()));
              }
            case FORMATTED:
            case HASH:
            case HASH_PREFIX:
              {
                // Those cases are supported:
                break;
              }
            default:
              throw new RuntimeException(
                  "Unhandled row key encoding: "
                      + KijiTableLayout.getEncoding(table.getLayout().getDesc().getKeysFormat()));
          }
          return generateEvenStartKeys(nsplits);
        }
      } finally {
        ResourceUtils.releaseOrLog(table);
      }
    } finally {
      ResourceUtils.releaseOrLog(kiji);
    }
  }

Exemple #5

0

Afficher le fichier

Fichier : KijiHFileOutputFormat.java Projet : jeffkole/kiji-mapreduce

 /** {@inheritDoc} */
 @Override
 public void write(HFileKeyValue entry, NullWritable unused) throws IOException {
   final ColumnId lgId = ColumnId.fromByteArray(entry.getFamily());
   getWriter(lgId).write(entry, unused);
 }