/** * Completes the construction of the Ism file. * * @throws IOException if an underlying write fails */ private void finish() throws IOException { long startOfBloomFilter = out.getCount(); ScalableBloomFilterCoder.of().encode(bloomFilterBuilder.build(), out, Context.NESTED); long startOfIndex = out.getCount(); indexOut.writeTo(out, 0, indexOut.size()); FooterCoder.of() .encode( new Footer(startOfIndex, startOfBloomFilter, numberOfKeysWritten), out, Coder.Context.OUTER); }
/** * Compute the length of the common prefix of the previous key and the given key and perform a * key order check. We check that the currently being inserted key is strictly greater than the * previous key. */ private int commonPrefixLength( RandomAccessData prevKeyBytes, RandomAccessData currentKeyBytes) { byte[] prevKey = prevKeyBytes.array(); byte[] currentKey = currentKeyBytes.array(); int minBytesLen = Math.min(prevKeyBytes.size(), currentKeyBytes.size()); for (int i = 0; i < minBytesLen; i++) { // unsigned comparison int b1 = prevKey[i] & 0xFF; int b2 = currentKey[i] & 0xFF; if (b1 > b2) { throw new IllegalArgumentException( IsmSinkWriter.class.getSimpleName() + " expects keys to be written in strictly increasing order but was given " + prevKeyBytes + " as the previous key and " + currentKeyBytes + " as the current key. Expected " + b1 + " <= " + b2 + " at position " + i + "."); } if (b1 != b2) { return i; } } if (prevKeyBytes.size() >= currentKeyBytes.size()) { throw new IllegalArgumentException( IsmSinkWriter.class.getSimpleName() + " expects keys to be written in strictly increasing order but was given " + prevKeyBytes + " as the previous key and " + currentKeyBytes + " as the current key. Expected length of previous key " + prevKeyBytes.size() + " <= " + currentKeyBytes.size() + " to current key."); } return minBytesLen; }
@Override public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException { // The windowed portion of the value is ignored. KV<K, V> value = windowedValue.getValue(); long currentPosition = out.getCount(); // Marshal the key, compute the common prefix length keyCoder.encode(value.getKey(), currentKeyBytes.asOutputStream(), Context.OUTER); int keySize = currentKeyBytes.size(); int sharedKeySize = commonPrefixLength(lastKeyBytes, currentKeyBytes); // Put key-value mapping record into block buffer int unsharedKeySize = keySize - sharedKeySize; KeyPrefix keyPrefix = new KeyPrefix(sharedKeySize, unsharedKeySize); KeyPrefixCoder.of().encode(keyPrefix, out, Context.NESTED); currentKeyBytes.writeTo(out, sharedKeySize, unsharedKeySize); valueCoder.encode(value.getValue(), out, Context.NESTED); // If we have emitted enough bytes to add another entry into the index. if (lastIndexedPosition + MAX_BLOCK_SIZE < out.getCount()) { int sharedIndexKeySize = commonPrefixLength(lastIndexKeyBytes, currentKeyBytes); int unsharedIndexKeySize = keySize - sharedIndexKeySize; KeyPrefix indexKeyPrefix = new KeyPrefix(sharedIndexKeySize, unsharedIndexKeySize); KeyPrefixCoder.of().encode(indexKeyPrefix, indexOut.asOutputStream(), Context.NESTED); currentKeyBytes.writeTo( indexOut.asOutputStream(), sharedIndexKeySize, unsharedIndexKeySize); VarInt.encode(currentPosition, indexOut.asOutputStream()); lastIndexKeyBytes.resetTo(0); currentKeyBytes.writeTo(lastIndexKeyBytes.asOutputStream(), 0, currentKeyBytes.size()); } // Update the bloom filter bloomFilterBuilder.put(currentKeyBytes.array(), 0, currentKeyBytes.size()); // Swap the current key and the previous key, resetting the previous key to be re-used. RandomAccessData temp = lastKeyBytes; lastKeyBytes = currentKeyBytes; currentKeyBytes = temp; currentKeyBytes.resetTo(0); numberOfKeysWritten += 1; return out.getCount() - currentPosition; }