@Override public Void apply(Iterable<KV<Long, Long>> estimatePerKey) { for (KV<Long, Long> result : estimatePerKey) { verifyEstimate(result.getKey(), sampleSize, result.getValue()); } return null; }
@Override public void processElement(ProcessContext c) { KV<K, AccumT> kv = c.element(); K key = kv.getKey(); OutputT output = this.combineFn.extractOutput(key, kv.getValue()); c.output(KV.of(key, output)); }
@Override public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException { // The windowed portion of the value is ignored. KV<K, V> value = windowedValue.getValue(); long currentPosition = out.getCount(); // Marshal the key, compute the common prefix length keyCoder.encode(value.getKey(), currentKeyBytes.asOutputStream(), Context.OUTER); int keySize = currentKeyBytes.size(); int sharedKeySize = commonPrefixLength(lastKeyBytes, currentKeyBytes); // Put key-value mapping record into block buffer int unsharedKeySize = keySize - sharedKeySize; KeyPrefix keyPrefix = new KeyPrefix(sharedKeySize, unsharedKeySize); KeyPrefixCoder.of().encode(keyPrefix, out, Context.NESTED); currentKeyBytes.writeTo(out, sharedKeySize, unsharedKeySize); valueCoder.encode(value.getValue(), out, Context.NESTED); // If we have emitted enough bytes to add another entry into the index. if (lastIndexedPosition + MAX_BLOCK_SIZE < out.getCount()) { int sharedIndexKeySize = commonPrefixLength(lastIndexKeyBytes, currentKeyBytes); int unsharedIndexKeySize = keySize - sharedIndexKeySize; KeyPrefix indexKeyPrefix = new KeyPrefix(sharedIndexKeySize, unsharedIndexKeySize); KeyPrefixCoder.of().encode(indexKeyPrefix, indexOut.asOutputStream(), Context.NESTED); currentKeyBytes.writeTo( indexOut.asOutputStream(), sharedIndexKeySize, unsharedIndexKeySize); VarInt.encode(currentPosition, indexOut.asOutputStream()); lastIndexKeyBytes.resetTo(0); currentKeyBytes.writeTo(lastIndexKeyBytes.asOutputStream(), 0, currentKeyBytes.size()); } // Update the bloom filter bloomFilterBuilder.put(currentKeyBytes.array(), 0, currentKeyBytes.size()); // Swap the current key and the previous key, resetting the previous key to be re-used. RandomAccessData temp = lastKeyBytes; lastKeyBytes = currentKeyBytes; currentKeyBytes = temp; currentKeyBytes.resetTo(0); numberOfKeysWritten += 1; return out.getCount() - currentPosition; }
@Override public void processElement(ProcessContext c) { KV<K, ?> e = c.element(); c.output(KV.of(e.getKey(), new RawUnionValue(index, e.getValue()))); }