Beispiel #1
0
 /**
  * Completes the construction of the Ism file.
  *
  * @throws IOException if an underlying write fails
  */
 private void finish() throws IOException {
   long startOfBloomFilter = out.getCount();
   ScalableBloomFilterCoder.of().encode(bloomFilterBuilder.build(), out, Context.NESTED);
   long startOfIndex = out.getCount();
   indexOut.writeTo(out, 0, indexOut.size());
   FooterCoder.of()
       .encode(
           new Footer(startOfIndex, startOfBloomFilter, numberOfKeysWritten),
           out,
           Coder.Context.OUTER);
 }
Beispiel #2
0
 /**
  * Compute the length of the common prefix of the previous key and the given key and perform a
  * key order check. We check that the currently being inserted key is strictly greater than the
  * previous key.
  */
 private int commonPrefixLength(
     RandomAccessData prevKeyBytes, RandomAccessData currentKeyBytes) {
   byte[] prevKey = prevKeyBytes.array();
   byte[] currentKey = currentKeyBytes.array();
   int minBytesLen = Math.min(prevKeyBytes.size(), currentKeyBytes.size());
   for (int i = 0; i < minBytesLen; i++) {
     // unsigned comparison
     int b1 = prevKey[i] & 0xFF;
     int b2 = currentKey[i] & 0xFF;
     if (b1 > b2) {
       throw new IllegalArgumentException(
           IsmSinkWriter.class.getSimpleName()
               + " expects keys to be written in strictly increasing order but was given "
               + prevKeyBytes
               + " as the previous key and "
               + currentKeyBytes
               + " as the current key. Expected "
               + b1
               + " <= "
               + b2
               + " at position "
               + i
               + ".");
     }
     if (b1 != b2) {
       return i;
     }
   }
   if (prevKeyBytes.size() >= currentKeyBytes.size()) {
     throw new IllegalArgumentException(
         IsmSinkWriter.class.getSimpleName()
             + " expects keys to be written in strictly increasing order but was given "
             + prevKeyBytes
             + " as the previous key and "
             + currentKeyBytes
             + " as the current key. Expected length of previous key "
             + prevKeyBytes.size()
             + " <= "
             + currentKeyBytes.size()
             + " to current key.");
   }
   return minBytesLen;
 }
Beispiel #3
0
    @Override
    public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException {
      // The windowed portion of the value is ignored.
      KV<K, V> value = windowedValue.getValue();

      long currentPosition = out.getCount();
      // Marshal the key, compute the common prefix length
      keyCoder.encode(value.getKey(), currentKeyBytes.asOutputStream(), Context.OUTER);
      int keySize = currentKeyBytes.size();
      int sharedKeySize = commonPrefixLength(lastKeyBytes, currentKeyBytes);

      // Put key-value mapping record into block buffer
      int unsharedKeySize = keySize - sharedKeySize;
      KeyPrefix keyPrefix = new KeyPrefix(sharedKeySize, unsharedKeySize);
      KeyPrefixCoder.of().encode(keyPrefix, out, Context.NESTED);
      currentKeyBytes.writeTo(out, sharedKeySize, unsharedKeySize);
      valueCoder.encode(value.getValue(), out, Context.NESTED);

      // If we have emitted enough bytes to add another entry into the index.
      if (lastIndexedPosition + MAX_BLOCK_SIZE < out.getCount()) {
        int sharedIndexKeySize = commonPrefixLength(lastIndexKeyBytes, currentKeyBytes);
        int unsharedIndexKeySize = keySize - sharedIndexKeySize;
        KeyPrefix indexKeyPrefix = new KeyPrefix(sharedIndexKeySize, unsharedIndexKeySize);
        KeyPrefixCoder.of().encode(indexKeyPrefix, indexOut.asOutputStream(), Context.NESTED);
        currentKeyBytes.writeTo(
            indexOut.asOutputStream(), sharedIndexKeySize, unsharedIndexKeySize);
        VarInt.encode(currentPosition, indexOut.asOutputStream());
        lastIndexKeyBytes.resetTo(0);
        currentKeyBytes.writeTo(lastIndexKeyBytes.asOutputStream(), 0, currentKeyBytes.size());
      }

      // Update the bloom filter
      bloomFilterBuilder.put(currentKeyBytes.array(), 0, currentKeyBytes.size());

      // Swap the current key and the previous key, resetting the previous key to be re-used.
      RandomAccessData temp = lastKeyBytes;
      lastKeyBytes = currentKeyBytes;
      currentKeyBytes = temp;
      currentKeyBytes.resetTo(0);

      numberOfKeysWritten += 1;
      return out.getCount() - currentPosition;
    }