public StringToLongMap() throws IOException { hashFileName = TempDir.createTempFile("strToLong", ".hash"); hashFile = IntFile.open(hashFileName); stringFileName = TempDir.createTempFile("strToLong", ".str"); stringFile = new RandomAccessFile(stringFileName, "rw"); nrBuckets = findPrime(MIN_NR_BUCKETS); clear(); }
/** Closes and deletes the files. */ public void delete() { if (hashFile != null) { try { hashFile.delete(); } catch (IOException ex) { logger.warn("An I/O error occurred while deleting: " + hashFileName, ex); } finally { hashFile = null; } } if (stringFile != null) { try { stringFile.close(); stringFileName.delete(); } catch (IOException ex) { logger.warn("An I/O error occurred while deleting: " + stringFileName, ex); } finally { stringFile = null; } } }
/** * Reorgainizes all the key/value pairs in the hash table to accomodate the new size. * * @param newNrBuckets The new size. */ private void rehash(int newNrBuckets) throws IOException { if (newNrBuckets == nrBuckets) return; int oldNrBuckets = nrBuckets; nrBuckets = newNrBuckets; // Iterate over the buckets, moving items to their correct locations. for (int bucket = 0; ; ++bucket) { long value; if ((value = getValue(bucket)) != 0) { // This bucket is in use. long offset = (long) bucket * 4; int hashCode = hashFile.getInt(offset); int destBucket = calcBucket(hashCode); // Work out where the item belongs. Abort if it ends up back here. while (destBucket != bucket) { if (getValue(destBucket) == 0) { // Found an empty bucket. Move the item here. long destOffset = (long) destBucket * 4; hashFile.putInt(destOffset, hashCode); hashFile.putInt(destOffset + 1, hashFile.getInt(offset + 1)); hashFile.putLong((long) destBucket * 2 + 1, value); // Clear the old bucket. hashFile.putLong((long) bucket * 2 + 1, 0); break; } // Try the next bucket. destBucket = (destBucket + 1) % nrBuckets; } } else { // An empty bucket. // Stop if we have processed all of the old buckets. // NOTE: We potentially process more than oldNrBuckets buckets // so that we pick up any items that clashed and were bounced // past the end of this range 0..(oldNrBuckets-1). if (bucket >= oldNrBuckets) break; } } }
private long getValue(int bucket) throws IOException { return hashFile.getLong((long) bucket * 2 + 1); }
/** * Returns the long associated with the String or 0 if there is no long associated with the * string. If newValue is not equal to 0 then this becomes the new value associated with the * string. * * @param str the String. * @param newValue if not equal to 0, the new value to be associated with the string. * @return the long currently associated with the String or 0 if there is no long associated with * the string. * @throws IOException if an I/O error occurs or the hash table is full. */ public long getAndPut(String str, long newValue) throws IOException { if (str == null) { throw new IllegalArgumentException("str is null"); } if (newValue == 0) { Long l = cache.get(str); if (l != null) { return l.longValue(); } } int hashCode = str.hashCode(); int startBucket = calcBucket(hashCode); int bucket = startBucket; long value; // Try buckets until we find the correct string or an empty bucket. while ((value = getValue(bucket)) != 0) { // Check the hash code. long offset = (long) bucket * 4; if (hashCode == hashFile.getInt(offset)) { // Fetch the string and compare with the target string. String bucketStr = readString(hashFile.getUInt(offset + 1)); if (str.equals(bucketStr)) { if (newValue != 0) { hashFile.putLong((long) bucket * 2 + 1, newValue); // Add the new value to the cache. cache.put(str, newValue); } else { // Add the value to the cache. cache.put(str, value); } return value; } } // Try the next bucket. bucket = (bucket + 1) % nrBuckets; if (bucket == startBucket) { throw new IOException("Hash table full"); } } if (newValue != 0) { // Add the new value to the cache. cache.put(str, newValue); // Add a new hash bucket. long bucketOffset = (long) bucket * 4; hashFile.putInt(bucketOffset, hashCode); hashFile.putUInt(bucketOffset + 1, writeString(str)); hashFile.putLong((long) bucket * 2 + 1, newValue); ++nrUsedBuckets; if (nrUsedBuckets >= (int) (nrBuckets * REHASH_LIMIT)) { rehash(findPrime(nrBuckets)); } } return 0; }
public void clear() throws IOException { hashFile.setSize(0); stringFile.setLength(0); cache.clear(); }