/**
  * Writes the value and value length for non-first record.
  *
  * @param kv Key-value writer.
  * @return The offset of the new record.
  */
 private long writeValueAndLength(KvSource kv) throws SerDeException {
   long valueOffset = writeBuffers.getWritePoint();
   kv.writeValue(writeBuffers);
   long tailOffset = writeBuffers.getWritePoint();
   writeBuffers.writeVLong(tailOffset - valueOffset);
   // LOG.info("Writing value at " + valueOffset + " length " + (tailOffset - valueOffset));
   return tailOffset;
 }
 /** Writes the debug dump of the table into logs. Not thread-safe. */
 public void debugDumpTable() {
   StringBuilder dump = new StringBuilder(keysAssigned + " keys\n");
   TreeMap<Long, Integer> byteIntervals = new TreeMap<Long, Integer>();
   int examined = 0;
   for (int slot = 0; slot < refs.length; ++slot) {
     long ref = refs[slot];
     if (ref == 0) {
       continue;
     }
     ++examined;
     long recOffset = getFirstRecordLengthsOffset(ref, null);
     long tailOffset = Ref.getOffset(ref);
     writeBuffers.setReadPoint(recOffset);
     int valueLength = (int) writeBuffers.readVLong(), keyLength = (int) writeBuffers.readVLong();
     long ptrOffset = writeBuffers.getReadPoint();
     if (Ref.hasList(ref)) {
       byteIntervals.put(recOffset, (int) (ptrOffset + 5 - recOffset));
     }
     long keyOffset = tailOffset - valueLength - keyLength;
     byte[] key = new byte[keyLength];
     WriteBuffers.ByteSegmentRef fakeRef = new WriteBuffers.ByteSegmentRef(keyOffset, keyLength);
     byteIntervals.put(keyOffset - 4, keyLength + 4);
     writeBuffers.populateValue(fakeRef);
     System.arraycopy(fakeRef.getBytes(), (int) fakeRef.getOffset(), key, 0, keyLength);
     dump.append(Utils.toStringBinary(key, 0, key.length))
         .append(" ref [")
         .append(dumpRef(ref))
         .append("]: ");
     Result hashMapResult = new Result();
     getValueResult(key, 0, key.length, hashMapResult);
     List<WriteBuffers.ByteSegmentRef> results = new ArrayList<WriteBuffers.ByteSegmentRef>();
     WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first();
     while (byteSegmentRef != null) {
       results.add(hashMapResult.byteSegmentRef);
       byteSegmentRef = hashMapResult.next();
     }
     dump.append(results.size()).append(" rows\n");
     for (int i = 0; i < results.size(); ++i) {
       WriteBuffers.ByteSegmentRef segment = results.get(i);
       byteIntervals.put(
           segment.getOffset(),
           segment.getLength() + ((i == 0) ? 1 : 0)); // state byte in the first record
     }
   }
   if (examined != keysAssigned) {
     dump.append("Found " + examined + " keys!\n");
   }
   // Report suspicious gaps in writeBuffers
   long currentOffset = 0;
   for (Map.Entry<Long, Integer> e : byteIntervals.entrySet()) {
     long start = e.getKey(), len = e.getValue();
     if (start - currentOffset > 4) {
       dump.append("Gap! [" + currentOffset + ", " + start + ")\n");
     }
     currentOffset = start + len;
   }
   LOG.info("Hashtable dump:\n " + dump.toString());
 }
 /**
  * Adds a newly-written record to existing list.
  *
  * @param lrPtrOffset List record pointer offset.
  * @param tailOffset New record offset.
  */
 private void addRecordToList(long lrPtrOffset, long tailOffset) {
   // Now, insert this record into the list.
   long prevHeadOffset = writeBuffers.readNByteLong(lrPtrOffset, 5);
   // LOG.info("Reading offset " + prevHeadOffset + " at " + lrPtrOffset);
   assert prevHeadOffset < tailOffset; // We replace an earlier element, must have lower offset.
   writeBuffers.writeFiveByteULong(lrPtrOffset, tailOffset);
   // LOG.info("Writing offset " + tailOffset + " at " + lrPtrOffset);
   writeBuffers.writeVLong(prevHeadOffset == 0 ? 0 : (tailOffset - prevHeadOffset));
 }
 /**
  * @param ref Reference.
  * @return The offset to value and key length vlongs of the first record referenced by ref.
  */
 private long getFirstRecordLengthsOffset(long ref, WriteBuffers.Position readPos) {
   long tailOffset = Ref.getOffset(ref);
   if (Ref.hasList(ref)) {
     long relativeOffset =
         (readPos == null)
             ? writeBuffers.readNByteLong(tailOffset, 5)
             : writeBuffers.readNByteLong(tailOffset, 5, readPos);
     tailOffset += relativeOffset;
   }
   return tailOffset;
 }
  /**
   * @param ref The ref.
   * @return The offset to list record pointer; list record is created if it doesn't exist.
   */
  private long createOrGetListRecord(long ref) {
    if (Ref.hasList(ref)) {
      // LOG.info("Found list record at " + writeBuffers.getReadPoint());
      return writeBuffers.getReadPoint(); // Assumes we are here after key compare.
    }
    long firstTailOffset = Ref.getOffset(ref);
    // LOG.info("First tail offset to create list record is " + firstTailOffset);

    // Determine the length of storage for value and key lengths of the first record.
    writeBuffers.setReadPoint(firstTailOffset);
    writeBuffers.skipVLong();
    writeBuffers.skipVLong();
    int lengthsLength = (int) (writeBuffers.getReadPoint() - firstTailOffset);

    // Create the list record, copy first record value/key lengths there.
    writeBuffers.writeBytes(firstTailOffset, lengthsLength);
    long lrPtrOffset = writeBuffers.getWritePoint();
    // LOG.info("Creating list record: copying " + lengthsLength + ", lrPtrOffset " + lrPtrOffset);

    // Reserve 5 bytes for writeValueRecord to fill. There might be junk there so null them.
    writeBuffers.write(FIVE_ZEROES);
    // Link the list record to the first element.
    writeBuffers.writeFiveByteULong(firstTailOffset, lrPtrOffset - lengthsLength - firstTailOffset);
    return lrPtrOffset;
  }
 /** Same as {@link #isSameKey(long, int, long, int)} but for externally stored key. */
 private boolean isSameKey(
     byte[] key, int offset, int length, long ref, int hashCode, WriteBuffers.Position readPos) {
   if (!compareHashBits(ref, hashCode)) {
     return false; // Hash bits don't match.
   }
   writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos);
   int valueLength = (int) writeBuffers.readVLong(readPos),
       keyLength = (int) writeBuffers.readVLong(readPos);
   long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength);
   // See the comment in the other isSameKey
   if (offset == 0) {
     return writeBuffers.isEqual(key, length, keyOffset, keyLength);
   } else {
     return writeBuffers.isEqual(key, offset, length, keyOffset, keyLength);
   }
 }
 /**
  * Finds the slot to use for reading.
  *
  * @param key Read key array.
  * @param length Read key length.
  * @return The ref to use for reading.
  */
 private long findKeyRefToRead(byte[] key, int offset, int length, WriteBuffers.Position readPos) {
   final int bucketMask = (refs.length - 1);
   int hashCode = writeBuffers.hashCode(key, offset, length);
   int slot = hashCode & bucketMask;
   // LOG.info("Read hash code for " + Utils.toStringBinary(key, 0, length)
   //   + " is " + Integer.toBinaryString(hashCode) + " - " + slot);
   long probeSlot = slot;
   int i = 0;
   while (true) {
     long ref = refs[slot];
     // When we were inserting the key, we would have inserted here; so, there's no key.
     if (ref == 0) {
       return 0;
     }
     if (isSameKey(key, offset, length, ref, hashCode, readPos)) {
       return ref;
     }
     ++metricGetConflict;
     probeSlot += (++i);
     if (i > largestNumberOfSteps) {
       // We know we never went that far when we were inserting.
       return 0;
     }
     slot = (int) (probeSlot & bucketMask);
   }
 }
 /**
  * Verifies that the key matches a requisite key.
  *
  * @param cmpOffset The offset to the key to compare with.
  * @param cmpLength The length of the key to compare with.
  * @param ref The ref that can be used to retrieve the candidate key.
  * @param hashCode
  * @return -1 if the key referenced by ref is different than the one referenced by cmp... 0 if the
  *     keys match, and there's only one value for this key (no list). Offset if the keys match,
  *     and there are multiple values for this key (a list).
  */
 private boolean isSameKey(long cmpOffset, int cmpLength, long ref, int hashCode) {
   if (!compareHashBits(ref, hashCode)) {
     return false; // Hash bits in ref don't match.
   }
   writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, null));
   int valueLength = (int) writeBuffers.readVLong(), keyLength = (int) writeBuffers.readVLong();
   if (keyLength != cmpLength) {
     return false;
   }
   long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength);
   // There's full hash code stored in front of the key. We could check that first. If keyLength
   // is <= 4 it obviously doesn't make sense, less bytes to check in a key. Then, if there's a
   // match, we check it in vain. But what is the proportion of matches? For writes it could be 0
   // if all keys are unique, for reads we hope it's really high. Then if there's a mismatch what
   // probability is there that key mismatches in <4 bytes (so just checking the key is faster)?
   // We assume the latter is pretty high, so we don't check for now.
   return writeBuffers.isEqual(cmpOffset, cmpLength, keyOffset, keyLength);
 }
Exemplo n.º 9
0
 @Override
 public int getHashFromKey() throws SerDeException {
   if (!(key instanceof BinaryComparable)) {
     throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName());
   }
   sanityCheckKeyForTag();
   BinaryComparable b = (BinaryComparable) key;
   return WriteBuffers.murmurHash(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0));
 }
  public void put(KvSource kv, int keyHashCode) throws SerDeException {
    if (resizeThreshold <= keysAssigned) {
      expandAndRehash();
    }

    // Reserve 4 bytes for the hash (don't just reserve, there may be junk there)
    writeBuffers.write(FOUR_ZEROES);

    // Write key to buffer to compute hashcode and compare; if it's a new key, it will
    // become part of the record; otherwise, we will just write over it later.
    long keyOffset = writeBuffers.getWritePoint();

    kv.writeKey(writeBuffers);
    int keyLength = (int) (writeBuffers.getWritePoint() - keyOffset);
    int hashCode = (keyHashCode == -1) ? writeBuffers.hashCode(keyOffset, keyLength) : keyHashCode;

    int slot = findKeySlotToWrite(keyOffset, keyLength, hashCode);
    // LOG.info("Write hash code is " + Integer.toBinaryString(hashCode) + " - " + slot);

    long ref = refs[slot];
    if (ref == 0) {
      // This is a new key, keep writing the first record.
      long tailOffset = writeFirstValueRecord(kv, keyOffset, keyLength, hashCode);
      byte stateByte = kv.updateStateByte(null);
      refs[slot] = Ref.makeFirstRef(tailOffset, stateByte, hashCode, startingHashBitCount);
      ++keysAssigned;
    } else {
      // This is not a new key; we'll overwrite the key and hash bytes - not needed anymore.
      writeBuffers.setWritePoint(keyOffset - 4);
      long lrPtrOffset = createOrGetListRecord(ref);
      long tailOffset = writeValueAndLength(kv);
      addRecordToList(lrPtrOffset, tailOffset);
      byte oldStateByte = Ref.getStateByte(ref);
      byte stateByte = kv.updateStateByte(oldStateByte);
      if (oldStateByte != stateByte) {
        ref = Ref.setStateByte(ref, stateByte);
      }
      refs[slot] = Ref.setListFlag(ref);
    }
    ++numValues;
  }
  private void expandAndRehashImpl(long capacity) {
    long expandTime = System.currentTimeMillis();
    final long[] oldRefs = refs;
    validateCapacity(capacity);
    long[] newRefs = new long[(int) capacity];

    // We store some hash bits in ref; for every expansion, we need to add one bit to hash.
    // If we have enough bits, we'll do that; if we don't, we'll rehash.
    // LOG.info("Expanding the hashtable to " + capacity + " capacity");
    int newHashBitCount = hashBitCount + 1;

    // Relocate all assigned slots from the old hash table.
    int maxSteps = 0;
    for (int oldSlot = 0; oldSlot < oldRefs.length; ++oldSlot) {
      long oldRef = oldRefs[oldSlot];
      if (oldRef == 0) {
        continue;
      }
      // TODO: we could actually store a bit flag in ref indicating whether this is a hash
      //       match or a probe, and in the former case use hash bits (for a first few resizes).
      // int hashCodeOrPart = oldSlot | Ref.getNthHashBit(oldRef, startingHashBitCount,
      // newHashBitCount);
      writeBuffers.setReadPoint(getFirstRecordLengthsOffset(oldRef, null));
      // Read the value and key length for the first record.
      int hashCode =
          (int)
              writeBuffers.readNByteLong(
                  Ref.getOffset(oldRef) - writeBuffers.readVLong() - writeBuffers.readVLong() - 4,
                  4);
      int probeSteps = relocateKeyRef(newRefs, oldRef, hashCode);
      maxSteps = Math.max(probeSteps, maxSteps);
    }
    this.refs = newRefs;
    this.largestNumberOfSteps = maxSteps;
    this.hashBitCount = newHashBitCount;
    this.resizeThreshold = (int) (capacity * loadFactor);
    metricExpandsMs += (System.currentTimeMillis() - expandTime);
    ++metricExpands;
  }
Exemplo n.º 12
0
    public JoinUtil.JoinResult setDirect(
        byte[] bytes, int offset, int length, BytesBytesMultiHashMap.Result hashMapResult) {

      int keyHash = WriteBuffers.murmurHash(bytes, offset, length);
      aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult);
      dummyRow = null;
      if (hashMapResult.hasRows()) {
        return JoinUtil.JoinResult.MATCH;
      } else {
        aliasFilter = (byte) 0xff;
        return JoinUtil.JoinResult.NOMATCH;
      }
    }
  /**
   * Finds a key. Values can be read with the supplied result object.
   *
   * @param key Key buffer.
   * @param offset the offset to the key in the buffer
   * @param hashMapResult The object to fill in that can read the values.
   * @param readPos Holds mutable read position for thread safety.
   * @return The state byte.
   */
  private byte getValueResult(
      byte[] key, int offset, int length, Result hashMapResult, WriteBuffers.Position readPos) {

    hashMapResult.forget();

    // First, find first record for the key.
    long ref = findKeyRefToRead(key, offset, length, readPos);
    if (ref == 0) {
      return 0;
    }

    boolean hasList = Ref.hasList(ref);

    // This relies on findKeyRefToRead doing key equality check and leaving read ptr where needed.
    long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0;

    hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen, readPos);

    return Ref.getStateByte(ref);
  }
 /**
  * Writes first value and lengths to finish the first record after the key has been written.
  *
  * @param kv Key-value writer.
  * @param keyOffset
  * @param keyLength Key length (already written).
  * @param hashCode
  * @return The offset of the new record.
  */
 private long writeFirstValueRecord(KvSource kv, long keyOffset, int keyLength, int hashCode)
     throws SerDeException {
   long valueOffset = writeBuffers.getWritePoint();
   kv.writeValue(writeBuffers);
   long tailOffset = writeBuffers.getWritePoint();
   int valueLength = (int) (tailOffset - valueOffset);
   // LOG.info("Writing value at " + valueOffset + " length " + valueLength);
   // In an unlikely case of 0-length key and value for the very first entry, we want to tell
   // this apart from an empty value. We'll just advance one byte; this byte will be lost.
   if (tailOffset == 0) {
     writeBuffers.reserve(1);
     ++tailOffset;
   }
   // LOG.info("First tail offset " + writeBuffers.getWritePoint());
   writeBuffers.writeVLong(valueLength);
   writeBuffers.writeVLong(keyLength);
   long lengthsLength = writeBuffers.getWritePoint() - tailOffset;
   if (lengthsLength < 5) { // Reserve space for potential future list
     writeBuffers.reserve(5 - (int) lengthsLength);
   }
   // Finally write the hash code.
   writeBuffers.writeInt(keyOffset - 4, hashCode);
   return tailOffset;
 }
 private void debugDumpKeyProbe(long keyOffset, int keyLength, int hashCode, int finalSlot) {
   final int bucketMask = refs.length - 1;
   WriteBuffers.ByteSegmentRef fakeRef = new WriteBuffers.ByteSegmentRef(keyOffset, keyLength);
   writeBuffers.populateValue(fakeRef);
   int slot = hashCode & bucketMask;
   long probeSlot = slot;
   StringBuilder sb = new StringBuilder("Probe path debug for [");
   sb.append(
       Utils.toStringBinary(fakeRef.getBytes(), (int) fakeRef.getOffset(), fakeRef.getLength()));
   sb.append("] hashCode ").append(Integer.toBinaryString(hashCode)).append(" is: ");
   int i = 0;
   while (slot != finalSlot) {
     probeSlot += (++i);
     slot = (int) (probeSlot & bucketMask);
     sb.append(slot)
         .append(" - ")
         .append(probeSlot)
         .append(" - ")
         .append(Long.toBinaryString(refs[slot]))
         .append("\n");
   }
   LOG.info(sb.toString());
 }
Exemplo n.º 16
0
 @Override
 public int getHashFromKey() throws SerDeException {
   byte[] keyBytes = key.getBytes();
   int keyLength = key.getLength();
   return WriteBuffers.murmurHash(keyBytes, 0, keyLength);
 }
 /** Not thread-safe! Use createGetterForThread. */
 public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) {
   return getValueResult(key, offset, length, hashMapResult, writeBuffers.getReadPosition());
 }
 /**
  * Take the segment reference from {@link #getValueRefs(byte[], int, List)} result and makes it
  * self-contained - adds byte array where the value is stored, and updates the offset from
  * "global" write buffers offset to offset within that array.
  */
 public void populateValue(WriteBuffers.ByteSegmentRef valueRef) {
   writeBuffers.populateValue(valueRef);
 }
 /**
  * Number of bytes used by the hashmap There are two main components that take most memory:
  * writeBuffers and refs Others include instance fields: 100
  *
  * @return number of bytes
  */
 public long memorySize() {
   return writeBuffers.size() + refs.length * 8 + 100;
 }
 public void seal() {
   writeBuffers.seal();
 }