// Funtion checks if current page has enough space to fit the new serialized tuple // If not it flushes the current buffer and gets a new page // TODO: The code does not ensure that pages are optimally packed. // It only tries to fill up the current page as much as possbile, if its // exhausted it requests a new page. Instead it would be nice to load the next page // that could fit the new value. private void ensureSpace(byte[] value) { if (!byteMap.canFit(value)) { // Flush current buffer byteMap.flushBuffer(); // Get next page byteMap = MappedByteBufferMap.newMappedByteBufferMap(thresholdBytes, spillFile); // Create new bloomfilter bFilters.add(BloomFilter.create(Funnels.byteArrayFunnel(), 1000)); } }
/** * Spill a key First we discover if the key has been spilled before and load it into memory: #ref * get() if it was loaded before just replace the old value in the memory page if it was not * loaded before try to store it in the current page alternatively if not enough memory available, * request new page. */ @Override public byte[] put(ImmutableBytesPtr key, byte[] value) { // page in element and replace if present byte[] spilledValue = get(key); if (spilledValue == null) { // Key does not exist yet // Check that currentPage is not full yet and optionally spill to disk ensureSpace(value); addKeyToBloomFilter(byteMap.getCurIndex(), key); } byteMap.putPagedInElement(key, value); return value; }
public SpillMap(SpillFile file, int thresholdBytes) throws IOException { this.thresholdBytes = thresholdBytes; this.spillFile = file; byteMap = MappedByteBufferMap.newMappedByteBufferMap(thresholdBytes, spillFile); bFilters = Lists.newArrayList(); bFilters.add(BloomFilter.create(Funnels.byteArrayFunnel(), 1000)); }
/** * Get a key from the spillable data structures This conducts a linear search through all active * pages in the current SpillFile Before doing an actual IO on the page, we check its associated * bloomFilter if the key is contained. False positives are possible but compensated for. */ @Override public byte[] get(Object key) { if (!(key instanceof ImmutableBytesPtr)) { // TODO ... work on type safety } ImmutableBytesPtr ikey = (ImmutableBytesPtr) key; byte[] value = null; int bucketIndex = 0; // Iterate over all pages for (int i = 0; i <= spillFile.getMaxPageId(); i++) { // run in loop in case of false positives in bloom filter bucketIndex = isKeyinPage(ikey, i); if (bucketIndex == -1) { // key not contained in current bloom filter continue; } if (bucketIndex != byteMap.getCurIndex()) { // key contained in page which is not in memory // page it in if (byteMap.getSize() > 0) { // ensure consistency and flush current memory page to disk byteMap.flushBuffer(); } // load page into memory byteMap = MappedByteBufferMap.newMappedByteBufferMap(bucketIndex, thresholdBytes, spillFile); byteMap.pageIn(); } // get KV from current queue value = byteMap.getPagedInElement(ikey); if (value != null) { return value; } } return value; }