/** * Iterate backwards w.r.t the buffer, to minimize collision chains when filling another hash * container (ex. with putAll()) */ @Override protected KTypeCursor<KType> fetch() { if (this.cursor.index == KTypeHashSet.this.keys.length + 1) { if (KTypeHashSet.this.allocatedDefaultKey) { this.cursor.index = KTypeHashSet.this.keys.length; this.cursor.value = Intrinsics.<KType>empty(); return this.cursor; } // no value associated with the default key, continue iteration... this.cursor.index = KTypeHashSet.this.keys.length; } int i = this.cursor.index - 1; while (i >= 0 && !is_allocated(i, Intrinsics.<KType[]>cast(KTypeHashSet.this.keys))) { i--; } if (i == -1) { return done(); } this.cursor.index = i; this.cursor.value = Intrinsics.<KType>cast(KTypeHashSet.this.keys[i]); return this.cursor; }
/** * Check that the set is consistent, i.e all allocated slots are reachable by get(), and all * not-allocated contains nulls if Generic * * @param set */ @After public void checkConsistency() { if (this.set != null) { int occupied = 0; final int mask = getKeys(this.set).length - 1; for (int i = 0; i < getKeys(this.set).length; i++) { if (!is_allocated(i, Intrinsics.<KType[]>cast(getKeys(this.set)))) { // if not allocated, generic version if patched to null for GC sake /*! #if ($TemplateOptions.KTypeGeneric) !*/ TestUtils.assertEquals2(this.keyE, getKeys(this.set)[i]); /*! #end !*/ } else { // try to reach the key by contains() Assert.assertTrue(this.set.contains(Intrinsics.<KType>cast(getKeys(this.set)[i]))); occupied++; } } if (isAllocatedDefaultKey(this.set)) { // try to reach the key by contains() Assert.assertTrue(this.set.contains(this.keyE)); occupied++; } Assert.assertEquals(occupied, this.set.size()); } }
@Test public void testForEachProcedure() { // Test that the container do not resize if less that the initial size // 1) Choose a map to build /*! #if ($TemplateOptions.isKType("GENERIC", "int", "long", "float", "double")) !*/ final int NB_ELEMENTS = 2000; /*! #elseif ($TemplateOptions.isKType("short", "char")) int NB_ELEMENTS = 1000; #else int NB_ELEMENTS = 126; #end !*/ final KTypeSet<KType> newSet = createNewSetInstance(); newSet.add(this.keyE); // add a increasing number of key for (int i = 0; i < NB_ELEMENTS; i++) { final int KVpair = i; newSet.add(cast(KVpair)); } // List the keys in the reverse-order of the internal buffer, since forEach() is iterating in // reverse also: final KTypeArrayList<KType> keyList = new KTypeArrayList<KType>(); keyList.add(this.keyE); for (int i = getKeys(newSet).length - 1; i >= 0; i--) { if (is_allocated(i, Intrinsics.<KType[]>cast(getKeys(newSet)))) { keyList.add(Intrinsics.<KType>cast(getKeys(newSet)[i])); } } // Test forEach predicate and stop at each key in turn. final KTypeArrayList<KType> keyListTest = new KTypeArrayList<KType>(); keyListTest.clear(); // A) Run forEach(KType) newSet.forEach( new KTypeProcedure<KType>() { @Override public void apply(final KType key) { keyListTest.add(key); } }); // check that keyList/keyListTest and valueList/valueListTest are identical. Assert.assertEquals(keyList, keyListTest); }
/** {@inheritDoc} */ @Override public int removeAll(final KTypePredicate<? super KType> predicate) { final int before = this.size(); if (this.allocatedDefaultKey) { if (predicate.apply(Intrinsics.<KType>empty())) { this.allocatedDefaultKey = false; } } final KType[] keys = Intrinsics.<KType[]>cast(this.keys); for (int i = 0; i < keys.length; ) { KType existing; if (!Intrinsics.<KType>isEmpty(existing = keys[i]) && predicate.apply(existing)) { shiftConflictingKeys(i); // Shift, do not increment slot. } else { i++; } } return before - this.size(); }
/** {@inheritDoc} */ @Override public <T extends KTypePredicate<? super KType>> T forEach(final T predicate) { if (this.allocatedDefaultKey) { if (!predicate.apply(Intrinsics.<KType>empty())) { return predicate; } } final KType[] keys = Intrinsics.<KType[]>cast(this.keys); // Iterate in reverse for side-stepping the longest conflict chain // in another hash, in case apply() is actually used to fill another hash container. for (int i = keys.length - 1; i >= 0; i--) { KType existing; if (!Intrinsics.<KType>isEmpty(existing = keys[i])) { if (!predicate.apply(existing)) { break; } } } return predicate; }
/** {@inheritDoc} */ @Override public boolean contains(final KType key) { if (Intrinsics.<KType>isEmpty(key)) { return this.allocatedDefaultKey; } final int mask = this.keys.length - 1; final KType[] keys = Intrinsics.<KType[]>cast(this.keys); int slot = REHASH(key) & mask; KType existing; /*! #if ($RH) !*/ final int[] cached = this.hash_cache; int dist = 0; /*! #end !*/ while (!Intrinsics.<KType>isEmpty(existing = keys[slot]) /*! #if ($RH) !*/ && dist <= probe_distance(slot, cached) /*! #end !*/) { if (KEYEQUALS(key, existing)) { return true; } slot = (slot + 1) & mask; /*! #if ($RH) !*/ dist++; /*! #end !*/ } // end while true return false; }
/*! #if ($RH) !*/ private int probe_distance(final int slot, final int[] cached) { final int rh = cached[slot]; /*! #if($DEBUG) !*/ // Check : cached hashed slot is == computed value final int mask = cached.length - 1; assert rh == (REHASH(Intrinsics.<KType>cast(this.keys[slot])) & mask); /*! #end !*/ if (slot < rh) { // wrap around return slot - rh + cached.length; } return slot - rh; }
/** {@inheritDoc} */ @Override public int hashCode() { int h = 0; // allocated default key has hash = 0 final KType[] keys = Intrinsics.<KType[]>cast(this.keys); for (int i = keys.length; --i >= 0; ) { KType existing; if (!Intrinsics.<KType>isEmpty(existing = keys[i])) { h += BitMixer.mix(existing); } } return h; }
/** {@inheritDoc} */ @Override public KType[] toArray(final KType[] target) { int count = 0; if (this.allocatedDefaultKey) { target[count++] = Intrinsics.<KType>empty(); } final KType[] keys = Intrinsics.<KType[]>cast(this.keys); for (int i = 0; i < keys.length; i++) { KType existing; if (!Intrinsics.<KType>isEmpty(existing = keys[i])) { target[count++] = existing; } } assert count == this.size(); return target; }
@Test public void testForEachProcedureWithException() { // Test that the container do not resize if less that the initial size // 1) Choose a map to build /*! #if ($TemplateOptions.isKType("GENERIC", "int", "long", "float", "double")) !*/ final int NB_ELEMENTS = 2000; /*! #elseif ($TemplateOptions.isKType("short", "char")) int NB_ELEMENTS = 1000; #else int NB_ELEMENTS = 126; #end !*/ final KTypeSet<KType> newSet = createNewSetInstance(); newSet.add(this.keyE); // add a increasing number of key for (int i = 0; i < NB_ELEMENTS; i++) { final int KVpair = i; newSet.add(cast(KVpair)); } // List the keys in the reverse-order of the internal buffer, since forEach() is iterating in // reverse also: final KTypeArrayList<KType> keyList = new KTypeArrayList<KType>(); keyList.add(this.keyE); // Test forEach predicate and stop at each key in turn. final KTypeArrayList<KType> keyListTest = new KTypeArrayList<KType>(); for (int k = getKeys(newSet).length - 1; k >= 0; k--) { if (is_allocated(k, Intrinsics.<KType[]>cast(getKeys(newSet)))) { keyList.add(Intrinsics.<KType>cast(getKeys(newSet)[k])); } } final int size = keyList.size(); for (int i = 0; i < size; i++) { final int currentPairIndexSizeToIterate = i + 1; keyListTest.clear(); keyList.clear(); keyList.add(this.keyE); for (int k = getKeys(newSet).length - 1; k >= 0; k--) { if (is_allocated(k, Intrinsics.<KType[]>cast(getKeys(newSet)))) { keyList.add(Intrinsics.<KType>cast(getKeys(newSet)[k])); } } // A) Run forEach(KType) try { newSet.forEach( new KTypeProcedure<KType>() { @Override public void apply(final KType key) { keyListTest.add(key); // when the stopping key/value pair is encountered, add to list and stop iteration if (key == keyList.get(currentPairIndexSizeToIterate - 1)) { // interrupt iteration by an exception throw new RuntimeException("Interrupted treatment by test"); } } }); } catch (final RuntimeException e) { if (!e.getMessage().equals("Interrupted treatment by test")) { throw e; } } finally { // despite the exception, the procedure terminates cleanly // check that keyList/keyListTest and valueList/valueListTest are identical for the first // currentPairIndexToIterate + 1 elements Assert.assertEquals("i = " + i, currentPairIndexSizeToIterate, keyListTest.size()); for (int j = 0; j < currentPairIndexSizeToIterate; j++) { TestUtils.assertEquals2("j = " + j, keyList.get(j), keyListTest.get(j)); } } // end finally } // end for each index }
/** Shift all the slot-conflicting keys allocated to (and including) <code>slot</code>. */ private void shiftConflictingKeys(int gapSlot) { final int mask = this.keys.length - 1; final KType[] keys = Intrinsics.<KType[]>cast(this.keys); /*! #if ($RH) !*/ final int[] cached = this.hash_cache; /*! #else final int perturb = this.perturbation; #end !*/ // Perform shifts of conflicting keys to fill in the gap. int distance = 0; while (true) { final int slot = (gapSlot + (++distance)) & mask; final KType existing = keys[slot]; if (Intrinsics.<KType>isEmpty(existing)) { break; } /*! #if ($RH) !*/ // use the cached value, no need to recompute final int idealSlotModMask = cached[slot]; /*! #if($DEBUG) !*/ // Check invariants assert idealSlotModMask == (REHASH(existing) & mask); /*! #end !*/ /*! #else final int idealSlotModMask = REHASH2(existing, perturb) & mask; #end !*/ // original HPPC code: shift = (slot - idealSlot) & mask; // equivalent to shift = (slot & mask - idealSlot & mask) & mask; // since slot and idealSlotModMask are already folded, we have : final int shift = (slot - idealSlotModMask) & mask; if (shift >= distance) { // Entry at this position was originally at or before the gap slot. // Move the conflict-shifted entry to the gap's position and repeat the procedure // for any entries to the right of the current position, treating it // as the new gap. keys[gapSlot] = existing; /*! #if ($RH) !*/ cached[gapSlot] = idealSlotModMask; /*! #if($DEBUG) !*/ assert cached[gapSlot] == (REHASH(existing) & mask); /*! #end !*/ /*! #end !*/ gapSlot = slot; distance = 0; } } // end while // Mark the last found gap slot without a conflict as empty. keys[gapSlot] = Intrinsics.<KType>empty(); this.assigned--; }
/** * Expand the internal storage buffers (capacity) or rehash current keys and values if there are a * lot of deleted slots. */ private void expandAndAdd(final KType pendingKey, final int freeSlot) { assert this.assigned == this.resizeAt; // default sentinel value is never in the keys[] array, so never trigger reallocs assert (!Intrinsics.<KType>isEmpty(pendingKey)); // Try to allocate new buffers first. If we OOM, it'll be now without // leaving the data structure in an inconsistent state. final KType[] oldKeys = Intrinsics.<KType[]>cast(this.keys); allocateBuffers( HashContainers.nextBufferSize(this.keys.length, this.assigned, this.loadFactor)); // We have succeeded at allocating new data so insert the pending key/value at // the free slot in the old arrays before rehashing. this.assigned++; oldKeys[freeSlot] = pendingKey; // Variables for adding final int mask = this.keys.length - 1; KType key = Intrinsics.<KType>empty(); // adding phase int slot = -1; final KType[] keys = Intrinsics.<KType[]>cast(this.keys); /*! #if ($RH) !*/ final int[] cached = this.hash_cache; /*! #end !*/ /*! #if ($RH) !*/ KType tmpKey = Intrinsics.<KType>empty(); int tmpAllocated = -1; int initial_slot = -1; int dist = -1; int existing_distance = -1; /*! #end !*/ // iterate all the old arrays to add in the newly allocated buffers // It is important to iterate backwards to minimize the conflict chain length ! final int perturb = this.perturbation; for (int i = oldKeys.length; --i >= 0; ) { // only consider non-empty slots, of course if (!Intrinsics.<KType>isEmpty(key = oldKeys[i])) { slot = REHASH2(key, perturb) & mask; /*! #if ($RH) !*/ initial_slot = slot; dist = 0; /*! #end !*/ // similar to add(), except all inserted keys are known to be unique. while (is_allocated(slot, keys)) { /*! #if ($RH) !*/ // re-shuffle keys to minimize variance existing_distance = probe_distance(slot, cached); if (dist > existing_distance) { // swap current (key, value, initial_slot) with slot places tmpKey = keys[slot]; keys[slot] = key; key = tmpKey; tmpAllocated = cached[slot]; cached[slot] = initial_slot; initial_slot = tmpAllocated; /*! #if($DEBUG) !*/ // Check invariants assert cached[slot] == (REHASH(keys[slot]) & mask); assert initial_slot == (REHASH(key) & mask); /*! #end !*/ dist = existing_distance; } // endif /*! #end !*/ slot = (slot + 1) & mask; /*! #if ($RH) !*/ dist++; /*! #end !*/ } // end while // place it at that position /*! #if ($RH) !*/ cached[slot] = initial_slot; /*! #end !*/ keys[slot] = key; /*! #if ($RH) !*/ /*! #if($DEBUG) !*/ // Check invariants assert cached[slot] == (REHASH(keys[slot]) & mask); /*! #end !*/ /*! #end !*/ } } }
/** {@inheritDoc} */ @Override public boolean add(KType key) { if (Intrinsics.<KType>isEmpty(key)) { if (this.allocatedDefaultKey) { return false; } this.allocatedDefaultKey = true; return true; } final int mask = this.keys.length - 1; final KType[] keys = Intrinsics.<KType[]>cast(this.keys); int slot = REHASH(key) & mask; KType existing; /*! #if ($RH) !*/ final int[] cached = this.hash_cache; KType tmpKey; int tmpAllocated; int initial_slot = slot; int dist = 0; int existing_distance = 0; /*! #if($DEBUG) !*/ final KType originalKey = key; /*! #end !*/ /*! #end !*/ while (!Intrinsics.<KType>isEmpty(existing = keys[slot])) { /*! #if ($RH) !*/ existing_distance = probe_distance(slot, cached); // When first entering the while loop, then key == original key to search. // So either: // 1) key is immediately found and the routine bail out, // or // 2) If the Robin-hood criteria of distance is not met, we search the next slot, (usual // linear probing) // or // 3) else the criteria of distance is met, then (key) is swapped with the ones in // slot position which becomes the new (key) to consider. This is OK because keys are swapped // only if dist > existing_distance, // i.e only if the key to add is NOT in the set, see contains(). So we steal the rich (a // previously entered key, favored because having being inserted // in a less crowed array) to give to the poor, the now inserted key. Then, we start searching // again in the next slot. /*! #if($DEBUG) !*/ // if the original key been swapped by the Robin-hood process, we actually never enter the // following if, so we are fine. if (!KEYEQUALS(key, originalKey)) { assert !KEYEQUALS(key, existing); } /*! #end !*/ /*! #end !*/ /*! #if($RH) !*/ // Robin-hood shortcut: if key exists, it can only be found in dist <= existing_distance // range. // indeed we should expect to never see an existing element with a shorter probe count // (existing_distance) // than our current count (dist): if that had happened, there would’ve been a swap during // insertion, see below. // also see contains() and remove() for the same trick. /*! #end !*/ if ( /*! #if ($RH) !*/ dist <= existing_distance && /*! #end !*/ KEYEQUALS(key, existing)) { return false; } /*! #if ($RH) !*/ // re-shuffle keys to minimize variance if (dist > existing_distance) { // we actually enter here only if the key to add is NOT in the set. // swap current (key, value, initial_slot) with slot places tmpKey = keys[slot]; keys[slot] = key; key = tmpKey; tmpAllocated = cached[slot]; cached[slot] = initial_slot; initial_slot = tmpAllocated; /*! #if($DEBUG) !*/ // Check invariants assert cached[slot] == (REHASH(keys[slot]) & mask); assert initial_slot == (REHASH(key) & mask); /*! #end !*/ dist = existing_distance; } /*! #end !*/ slot = (slot + 1) & mask; /*! #if ($RH) !*/ dist++; /*! #end !*/ } // Check if we need to grow. If so, reallocate new data, // fill in the last element and rehash. if (this.assigned == this.resizeAt) { expandAndAdd(key, slot); } else { this.assigned++; /*! #if ($RH) !*/ cached[slot] = initial_slot; /*! #end !*/ keys[slot] = key; /*! #if ($RH) !*/ /*! #if($DEBUG) !*/ // Check invariants assert cached[slot] == (REHASH(keys[slot]) & mask); /*! #end !*/ /*! #end !*/ } return true; }