/** * Writes the value and value length for non-first record. * * @param kv Key-value writer. * @return The offset of the new record. */ private long writeValueAndLength(KvSource kv) throws SerDeException { long valueOffset = writeBuffers.getWritePoint(); kv.writeValue(writeBuffers); long tailOffset = writeBuffers.getWritePoint(); writeBuffers.writeVLong(tailOffset - valueOffset); // LOG.info("Writing value at " + valueOffset + " length " + (tailOffset - valueOffset)); return tailOffset; }
/** Writes the debug dump of the table into logs. Not thread-safe. */ public void debugDumpTable() { StringBuilder dump = new StringBuilder(keysAssigned + " keys\n"); TreeMap<Long, Integer> byteIntervals = new TreeMap<Long, Integer>(); int examined = 0; for (int slot = 0; slot < refs.length; ++slot) { long ref = refs[slot]; if (ref == 0) { continue; } ++examined; long recOffset = getFirstRecordLengthsOffset(ref, null); long tailOffset = Ref.getOffset(ref); writeBuffers.setReadPoint(recOffset); int valueLength = (int) writeBuffers.readVLong(), keyLength = (int) writeBuffers.readVLong(); long ptrOffset = writeBuffers.getReadPoint(); if (Ref.hasList(ref)) { byteIntervals.put(recOffset, (int) (ptrOffset + 5 - recOffset)); } long keyOffset = tailOffset - valueLength - keyLength; byte[] key = new byte[keyLength]; WriteBuffers.ByteSegmentRef fakeRef = new WriteBuffers.ByteSegmentRef(keyOffset, keyLength); byteIntervals.put(keyOffset - 4, keyLength + 4); writeBuffers.populateValue(fakeRef); System.arraycopy(fakeRef.getBytes(), (int) fakeRef.getOffset(), key, 0, keyLength); dump.append(Utils.toStringBinary(key, 0, key.length)) .append(" ref [") .append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); getValueResult(key, 0, key.length, hashMapResult); List<WriteBuffers.ByteSegmentRef> results = new ArrayList<WriteBuffers.ByteSegmentRef>(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { results.add(hashMapResult.byteSegmentRef); byteSegmentRef = hashMapResult.next(); } dump.append(results.size()).append(" rows\n"); for (int i = 0; i < results.size(); ++i) { WriteBuffers.ByteSegmentRef segment = results.get(i); byteIntervals.put( segment.getOffset(), segment.getLength() + ((i == 0) ? 1 : 0)); // state byte in the first record } } if (examined != keysAssigned) { dump.append("Found " + examined + " keys!\n"); } // Report suspicious gaps in writeBuffers long currentOffset = 0; for (Map.Entry<Long, Integer> e : byteIntervals.entrySet()) { long start = e.getKey(), len = e.getValue(); if (start - currentOffset > 4) { dump.append("Gap! [" + currentOffset + ", " + start + ")\n"); } currentOffset = start + len; } LOG.info("Hashtable dump:\n " + dump.toString()); }
/** * Adds a newly-written record to existing list. * * @param lrPtrOffset List record pointer offset. * @param tailOffset New record offset. */ private void addRecordToList(long lrPtrOffset, long tailOffset) { // Now, insert this record into the list. long prevHeadOffset = writeBuffers.readNByteLong(lrPtrOffset, 5); // LOG.info("Reading offset " + prevHeadOffset + " at " + lrPtrOffset); assert prevHeadOffset < tailOffset; // We replace an earlier element, must have lower offset. writeBuffers.writeFiveByteULong(lrPtrOffset, tailOffset); // LOG.info("Writing offset " + tailOffset + " at " + lrPtrOffset); writeBuffers.writeVLong(prevHeadOffset == 0 ? 0 : (tailOffset - prevHeadOffset)); }
/** * @param ref Reference. * @return The offset to value and key length vlongs of the first record referenced by ref. */ private long getFirstRecordLengthsOffset(long ref, WriteBuffers.Position readPos) { long tailOffset = Ref.getOffset(ref); if (Ref.hasList(ref)) { long relativeOffset = (readPos == null) ? writeBuffers.readNByteLong(tailOffset, 5) : writeBuffers.readNByteLong(tailOffset, 5, readPos); tailOffset += relativeOffset; } return tailOffset; }
/** * @param ref The ref. * @return The offset to list record pointer; list record is created if it doesn't exist. */ private long createOrGetListRecord(long ref) { if (Ref.hasList(ref)) { // LOG.info("Found list record at " + writeBuffers.getReadPoint()); return writeBuffers.getReadPoint(); // Assumes we are here after key compare. } long firstTailOffset = Ref.getOffset(ref); // LOG.info("First tail offset to create list record is " + firstTailOffset); // Determine the length of storage for value and key lengths of the first record. writeBuffers.setReadPoint(firstTailOffset); writeBuffers.skipVLong(); writeBuffers.skipVLong(); int lengthsLength = (int) (writeBuffers.getReadPoint() - firstTailOffset); // Create the list record, copy first record value/key lengths there. writeBuffers.writeBytes(firstTailOffset, lengthsLength); long lrPtrOffset = writeBuffers.getWritePoint(); // LOG.info("Creating list record: copying " + lengthsLength + ", lrPtrOffset " + lrPtrOffset); // Reserve 5 bytes for writeValueRecord to fill. There might be junk there so null them. writeBuffers.write(FIVE_ZEROES); // Link the list record to the first element. writeBuffers.writeFiveByteULong(firstTailOffset, lrPtrOffset - lengthsLength - firstTailOffset); return lrPtrOffset; }
/** Same as {@link #isSameKey(long, int, long, int)} but for externally stored key. */ private boolean isSameKey( byte[] key, int offset, int length, long ref, int hashCode, WriteBuffers.Position readPos) { if (!compareHashBits(ref, hashCode)) { return false; // Hash bits don't match. } writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); int valueLength = (int) writeBuffers.readVLong(readPos), keyLength = (int) writeBuffers.readVLong(readPos); long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); // See the comment in the other isSameKey if (offset == 0) { return writeBuffers.isEqual(key, length, keyOffset, keyLength); } else { return writeBuffers.isEqual(key, offset, length, keyOffset, keyLength); } }
/** * Finds the slot to use for reading. * * @param key Read key array. * @param length Read key length. * @return The ref to use for reading. */ private long findKeyRefToRead(byte[] key, int offset, int length, WriteBuffers.Position readPos) { final int bucketMask = (refs.length - 1); int hashCode = writeBuffers.hashCode(key, offset, length); int slot = hashCode & bucketMask; // LOG.info("Read hash code for " + Utils.toStringBinary(key, 0, length) // + " is " + Integer.toBinaryString(hashCode) + " - " + slot); long probeSlot = slot; int i = 0; while (true) { long ref = refs[slot]; // When we were inserting the key, we would have inserted here; so, there's no key. if (ref == 0) { return 0; } if (isSameKey(key, offset, length, ref, hashCode, readPos)) { return ref; } ++metricGetConflict; probeSlot += (++i); if (i > largestNumberOfSteps) { // We know we never went that far when we were inserting. return 0; } slot = (int) (probeSlot & bucketMask); } }
/** * Verifies that the key matches a requisite key. * * @param cmpOffset The offset to the key to compare with. * @param cmpLength The length of the key to compare with. * @param ref The ref that can be used to retrieve the candidate key. * @param hashCode * @return -1 if the key referenced by ref is different than the one referenced by cmp... 0 if the * keys match, and there's only one value for this key (no list). Offset if the keys match, * and there are multiple values for this key (a list). */ private boolean isSameKey(long cmpOffset, int cmpLength, long ref, int hashCode) { if (!compareHashBits(ref, hashCode)) { return false; // Hash bits in ref don't match. } writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, null)); int valueLength = (int) writeBuffers.readVLong(), keyLength = (int) writeBuffers.readVLong(); if (keyLength != cmpLength) { return false; } long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); // There's full hash code stored in front of the key. We could check that first. If keyLength // is <= 4 it obviously doesn't make sense, less bytes to check in a key. Then, if there's a // match, we check it in vain. But what is the proportion of matches? For writes it could be 0 // if all keys are unique, for reads we hope it's really high. Then if there's a mismatch what // probability is there that key mismatches in <4 bytes (so just checking the key is faster)? // We assume the latter is pretty high, so we don't check for now. return writeBuffers.isEqual(cmpOffset, cmpLength, keyOffset, keyLength); }
@Override public int getHashFromKey() throws SerDeException { if (!(key instanceof BinaryComparable)) { throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName()); } sanityCheckKeyForTag(); BinaryComparable b = (BinaryComparable) key; return WriteBuffers.murmurHash(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0)); }
public void put(KvSource kv, int keyHashCode) throws SerDeException { if (resizeThreshold <= keysAssigned) { expandAndRehash(); } // Reserve 4 bytes for the hash (don't just reserve, there may be junk there) writeBuffers.write(FOUR_ZEROES); // Write key to buffer to compute hashcode and compare; if it's a new key, it will // become part of the record; otherwise, we will just write over it later. long keyOffset = writeBuffers.getWritePoint(); kv.writeKey(writeBuffers); int keyLength = (int) (writeBuffers.getWritePoint() - keyOffset); int hashCode = (keyHashCode == -1) ? writeBuffers.hashCode(keyOffset, keyLength) : keyHashCode; int slot = findKeySlotToWrite(keyOffset, keyLength, hashCode); // LOG.info("Write hash code is " + Integer.toBinaryString(hashCode) + " - " + slot); long ref = refs[slot]; if (ref == 0) { // This is a new key, keep writing the first record. long tailOffset = writeFirstValueRecord(kv, keyOffset, keyLength, hashCode); byte stateByte = kv.updateStateByte(null); refs[slot] = Ref.makeFirstRef(tailOffset, stateByte, hashCode, startingHashBitCount); ++keysAssigned; } else { // This is not a new key; we'll overwrite the key and hash bytes - not needed anymore. writeBuffers.setWritePoint(keyOffset - 4); long lrPtrOffset = createOrGetListRecord(ref); long tailOffset = writeValueAndLength(kv); addRecordToList(lrPtrOffset, tailOffset); byte oldStateByte = Ref.getStateByte(ref); byte stateByte = kv.updateStateByte(oldStateByte); if (oldStateByte != stateByte) { ref = Ref.setStateByte(ref, stateByte); } refs[slot] = Ref.setListFlag(ref); } ++numValues; }
private void expandAndRehashImpl(long capacity) { long expandTime = System.currentTimeMillis(); final long[] oldRefs = refs; validateCapacity(capacity); long[] newRefs = new long[(int) capacity]; // We store some hash bits in ref; for every expansion, we need to add one bit to hash. // If we have enough bits, we'll do that; if we don't, we'll rehash. // LOG.info("Expanding the hashtable to " + capacity + " capacity"); int newHashBitCount = hashBitCount + 1; // Relocate all assigned slots from the old hash table. int maxSteps = 0; for (int oldSlot = 0; oldSlot < oldRefs.length; ++oldSlot) { long oldRef = oldRefs[oldSlot]; if (oldRef == 0) { continue; } // TODO: we could actually store a bit flag in ref indicating whether this is a hash // match or a probe, and in the former case use hash bits (for a first few resizes). // int hashCodeOrPart = oldSlot | Ref.getNthHashBit(oldRef, startingHashBitCount, // newHashBitCount); writeBuffers.setReadPoint(getFirstRecordLengthsOffset(oldRef, null)); // Read the value and key length for the first record. int hashCode = (int) writeBuffers.readNByteLong( Ref.getOffset(oldRef) - writeBuffers.readVLong() - writeBuffers.readVLong() - 4, 4); int probeSteps = relocateKeyRef(newRefs, oldRef, hashCode); maxSteps = Math.max(probeSteps, maxSteps); } this.refs = newRefs; this.largestNumberOfSteps = maxSteps; this.hashBitCount = newHashBitCount; this.resizeThreshold = (int) (capacity * loadFactor); metricExpandsMs += (System.currentTimeMillis() - expandTime); ++metricExpands; }
public JoinUtil.JoinResult setDirect( byte[] bytes, int offset, int length, BytesBytesMultiHashMap.Result hashMapResult) { int keyHash = WriteBuffers.murmurHash(bytes, offset, length); aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; } else { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } }
/** * Finds a key. Values can be read with the supplied result object. * * @param key Key buffer. * @param offset the offset to the key in the buffer * @param hashMapResult The object to fill in that can read the values. * @param readPos Holds mutable read position for thread safety. * @return The state byte. */ private byte getValueResult( byte[] key, int offset, int length, Result hashMapResult, WriteBuffers.Position readPos) { hashMapResult.forget(); // First, find first record for the key. long ref = findKeyRefToRead(key, offset, length, readPos); if (ref == 0) { return 0; } boolean hasList = Ref.hasList(ref); // This relies on findKeyRefToRead doing key equality check and leaving read ptr where needed. long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0; hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen, readPos); return Ref.getStateByte(ref); }
/** * Writes first value and lengths to finish the first record after the key has been written. * * @param kv Key-value writer. * @param keyOffset * @param keyLength Key length (already written). * @param hashCode * @return The offset of the new record. */ private long writeFirstValueRecord(KvSource kv, long keyOffset, int keyLength, int hashCode) throws SerDeException { long valueOffset = writeBuffers.getWritePoint(); kv.writeValue(writeBuffers); long tailOffset = writeBuffers.getWritePoint(); int valueLength = (int) (tailOffset - valueOffset); // LOG.info("Writing value at " + valueOffset + " length " + valueLength); // In an unlikely case of 0-length key and value for the very first entry, we want to tell // this apart from an empty value. We'll just advance one byte; this byte will be lost. if (tailOffset == 0) { writeBuffers.reserve(1); ++tailOffset; } // LOG.info("First tail offset " + writeBuffers.getWritePoint()); writeBuffers.writeVLong(valueLength); writeBuffers.writeVLong(keyLength); long lengthsLength = writeBuffers.getWritePoint() - tailOffset; if (lengthsLength < 5) { // Reserve space for potential future list writeBuffers.reserve(5 - (int) lengthsLength); } // Finally write the hash code. writeBuffers.writeInt(keyOffset - 4, hashCode); return tailOffset; }
private void debugDumpKeyProbe(long keyOffset, int keyLength, int hashCode, int finalSlot) { final int bucketMask = refs.length - 1; WriteBuffers.ByteSegmentRef fakeRef = new WriteBuffers.ByteSegmentRef(keyOffset, keyLength); writeBuffers.populateValue(fakeRef); int slot = hashCode & bucketMask; long probeSlot = slot; StringBuilder sb = new StringBuilder("Probe path debug for ["); sb.append( Utils.toStringBinary(fakeRef.getBytes(), (int) fakeRef.getOffset(), fakeRef.getLength())); sb.append("] hashCode ").append(Integer.toBinaryString(hashCode)).append(" is: "); int i = 0; while (slot != finalSlot) { probeSlot += (++i); slot = (int) (probeSlot & bucketMask); sb.append(slot) .append(" - ") .append(probeSlot) .append(" - ") .append(Long.toBinaryString(refs[slot])) .append("\n"); } LOG.info(sb.toString()); }
@Override public int getHashFromKey() throws SerDeException { byte[] keyBytes = key.getBytes(); int keyLength = key.getLength(); return WriteBuffers.murmurHash(keyBytes, 0, keyLength); }
/** Not thread-safe! Use createGetterForThread. */ public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) { return getValueResult(key, offset, length, hashMapResult, writeBuffers.getReadPosition()); }
/** * Take the segment reference from {@link #getValueRefs(byte[], int, List)} result and makes it * self-contained - adds byte array where the value is stored, and updates the offset from * "global" write buffers offset to offset within that array. */ public void populateValue(WriteBuffers.ByteSegmentRef valueRef) { writeBuffers.populateValue(valueRef); }
/** * Number of bytes used by the hashmap There are two main components that take most memory: * writeBuffers and refs Others include instance fields: 100 * * @return number of bytes */ public long memorySize() { return writeBuffers.size() + refs.length * 8 + 100; }
public void seal() { writeBuffers.seal(); }