/** * @param ref The ref. * @return The offset to list record pointer; list record is created if it doesn't exist. */ private long createOrGetListRecord(long ref) { if (Ref.hasList(ref)) { // LOG.info("Found list record at " + writeBuffers.getReadPoint()); return writeBuffers.getReadPoint(); // Assumes we are here after key compare. } long firstTailOffset = Ref.getOffset(ref); // LOG.info("First tail offset to create list record is " + firstTailOffset); // Determine the length of storage for value and key lengths of the first record. writeBuffers.setReadPoint(firstTailOffset); writeBuffers.skipVLong(); writeBuffers.skipVLong(); int lengthsLength = (int) (writeBuffers.getReadPoint() - firstTailOffset); // Create the list record, copy first record value/key lengths there. writeBuffers.writeBytes(firstTailOffset, lengthsLength); long lrPtrOffset = writeBuffers.getWritePoint(); // LOG.info("Creating list record: copying " + lengthsLength + ", lrPtrOffset " + lrPtrOffset); // Reserve 5 bytes for writeValueRecord to fill. There might be junk there so null them. writeBuffers.write(FIVE_ZEROES); // Link the list record to the first element. writeBuffers.writeFiveByteULong(firstTailOffset, lrPtrOffset - lengthsLength - firstTailOffset); return lrPtrOffset; }
/** Writes the debug dump of the table into logs. Not thread-safe. */ public void debugDumpTable() { StringBuilder dump = new StringBuilder(keysAssigned + " keys\n"); TreeMap<Long, Integer> byteIntervals = new TreeMap<Long, Integer>(); int examined = 0; for (int slot = 0; slot < refs.length; ++slot) { long ref = refs[slot]; if (ref == 0) { continue; } ++examined; long recOffset = getFirstRecordLengthsOffset(ref, null); long tailOffset = Ref.getOffset(ref); writeBuffers.setReadPoint(recOffset); int valueLength = (int) writeBuffers.readVLong(), keyLength = (int) writeBuffers.readVLong(); long ptrOffset = writeBuffers.getReadPoint(); if (Ref.hasList(ref)) { byteIntervals.put(recOffset, (int) (ptrOffset + 5 - recOffset)); } long keyOffset = tailOffset - valueLength - keyLength; byte[] key = new byte[keyLength]; WriteBuffers.ByteSegmentRef fakeRef = new WriteBuffers.ByteSegmentRef(keyOffset, keyLength); byteIntervals.put(keyOffset - 4, keyLength + 4); writeBuffers.populateValue(fakeRef); System.arraycopy(fakeRef.getBytes(), (int) fakeRef.getOffset(), key, 0, keyLength); dump.append(Utils.toStringBinary(key, 0, key.length)) .append(" ref [") .append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); getValueResult(key, 0, key.length, hashMapResult); List<WriteBuffers.ByteSegmentRef> results = new ArrayList<WriteBuffers.ByteSegmentRef>(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { results.add(hashMapResult.byteSegmentRef); byteSegmentRef = hashMapResult.next(); } dump.append(results.size()).append(" rows\n"); for (int i = 0; i < results.size(); ++i) { WriteBuffers.ByteSegmentRef segment = results.get(i); byteIntervals.put( segment.getOffset(), segment.getLength() + ((i == 0) ? 1 : 0)); // state byte in the first record } } if (examined != keysAssigned) { dump.append("Found " + examined + " keys!\n"); } // Report suspicious gaps in writeBuffers long currentOffset = 0; for (Map.Entry<Long, Integer> e : byteIntervals.entrySet()) { long start = e.getKey(), len = e.getValue(); if (start - currentOffset > 4) { dump.append("Gap! [" + currentOffset + ", " + start + ")\n"); } currentOffset = start + len; } LOG.info("Hashtable dump:\n " + dump.toString()); }
/** Same as {@link #isSameKey(long, int, long, int)} but for externally stored key. */ private boolean isSameKey( byte[] key, int offset, int length, long ref, int hashCode, WriteBuffers.Position readPos) { if (!compareHashBits(ref, hashCode)) { return false; // Hash bits don't match. } writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); int valueLength = (int) writeBuffers.readVLong(readPos), keyLength = (int) writeBuffers.readVLong(readPos); long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); // See the comment in the other isSameKey if (offset == 0) { return writeBuffers.isEqual(key, length, keyOffset, keyLength); } else { return writeBuffers.isEqual(key, offset, length, keyOffset, keyLength); } }
/** * Verifies that the key matches a requisite key. * * @param cmpOffset The offset to the key to compare with. * @param cmpLength The length of the key to compare with. * @param ref The ref that can be used to retrieve the candidate key. * @param hashCode * @return -1 if the key referenced by ref is different than the one referenced by cmp... 0 if the * keys match, and there's only one value for this key (no list). Offset if the keys match, * and there are multiple values for this key (a list). */ private boolean isSameKey(long cmpOffset, int cmpLength, long ref, int hashCode) { if (!compareHashBits(ref, hashCode)) { return false; // Hash bits in ref don't match. } writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, null)); int valueLength = (int) writeBuffers.readVLong(), keyLength = (int) writeBuffers.readVLong(); if (keyLength != cmpLength) { return false; } long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); // There's full hash code stored in front of the key. We could check that first. If keyLength // is <= 4 it obviously doesn't make sense, less bytes to check in a key. Then, if there's a // match, we check it in vain. But what is the proportion of matches? For writes it could be 0 // if all keys are unique, for reads we hope it's really high. Then if there's a mismatch what // probability is there that key mismatches in <4 bytes (so just checking the key is faster)? // We assume the latter is pretty high, so we don't check for now. return writeBuffers.isEqual(cmpOffset, cmpLength, keyOffset, keyLength); }
private void expandAndRehashImpl(long capacity) { long expandTime = System.currentTimeMillis(); final long[] oldRefs = refs; validateCapacity(capacity); long[] newRefs = new long[(int) capacity]; // We store some hash bits in ref; for every expansion, we need to add one bit to hash. // If we have enough bits, we'll do that; if we don't, we'll rehash. // LOG.info("Expanding the hashtable to " + capacity + " capacity"); int newHashBitCount = hashBitCount + 1; // Relocate all assigned slots from the old hash table. int maxSteps = 0; for (int oldSlot = 0; oldSlot < oldRefs.length; ++oldSlot) { long oldRef = oldRefs[oldSlot]; if (oldRef == 0) { continue; } // TODO: we could actually store a bit flag in ref indicating whether this is a hash // match or a probe, and in the former case use hash bits (for a first few resizes). // int hashCodeOrPart = oldSlot | Ref.getNthHashBit(oldRef, startingHashBitCount, // newHashBitCount); writeBuffers.setReadPoint(getFirstRecordLengthsOffset(oldRef, null)); // Read the value and key length for the first record. int hashCode = (int) writeBuffers.readNByteLong( Ref.getOffset(oldRef) - writeBuffers.readVLong() - writeBuffers.readVLong() - 4, 4); int probeSteps = relocateKeyRef(newRefs, oldRef, hashCode); maxSteps = Math.max(probeSteps, maxSteps); } this.refs = newRefs; this.largestNumberOfSteps = maxSteps; this.hashBitCount = newHashBitCount; this.resizeThreshold = (int) (capacity * loadFactor); metricExpandsMs += (System.currentTimeMillis() - expandTime); ++metricExpands; }