/** * Finds and returns name matching the specified symbol, if such name already exists in the table; * or if not, creates name object, adds to the table, and returns it. * * <p>Note: this is the general purpose method that can be called for names of any length. * However, if name is less than 9 bytes long, it is preferable to call the version optimized for * short names. * * @param quads Array of int32s, each of which contain 4 bytes of encoded name * @param qlen Number of int32s, starting from index 0, in quads parameter * @return PName matching the symbol passed (or constructed for it) */ public ByteBasedPName findSymbol(int hash, int[] quads, int qlen) { if (qlen < 3) { // another sanity check return findSymbol(hash, quads[0], (qlen < 2) ? 0 : quads[1]); } // (for rest of comments regarding logic, see method above) int ix = (hash & mMainHashMask); int val = mMainHash[ix]; if ((((val >> 8) ^ hash) << 8) == 0) { ByteBasedPName pname = mMainNames[ix]; if (pname == null) { // main slot empty; no collision list then either return null; } if (pname.equals(quads, qlen)) { // should be match, let's verify return pname; } } else if (val == 0) { // empty slot? no match return null; } val &= 0xFF; if (val > 0) { // 0 means 'empty' val -= 1; // to convert from 1-based to 0... Bucket bucket = mCollList[val]; if (bucket != null) { return bucket.find(hash, quads, qlen); } } return null; }
/** * Finds and returns name matching the specified symbol, if such name already exists in the table; * or if not, creates name object, adds to the table, and returns it. * * <p>Note: separate methods to optimize common case of relatively short element/attribute names * (8 or less ascii characters) * * @param firstQuad int32 containing first 4 bytes of the pname; if the whole name less than 4 * bytes, padded with zero bytes in front (zero MSBs, ie. right aligned) * @param secondQuad int32 containing bytes 5 through 8 of the pname; if less than 8 bytes, padded * with up to 4 zero bytes in front (zero MSBs, ie. right aligned) * @return PName matching the symbol passed (or constructed for it) */ public ByteBasedPName findSymbol(int hash, int firstQuad, int secondQuad) { int ix = (hash & mMainHashMask); int val = mMainHash[ix]; /* High 24 bits of the value are low 24 bits of hash (low 8 bits * are bucket index)... match? */ if ((((val >> 8) ^ hash) << 8) == 0) { // match // Ok, but do we have an actual match? ByteBasedPName pname = mMainNames[ix]; if (pname == null) { // main slot empty; can't find return null; } if (pname.equals(firstQuad, secondQuad)) { return pname; } } else if (val == 0) { // empty slot? no match return null; } // Maybe a spill-over? val &= 0xFF; if (val > 0) { // 0 means 'empty' val -= 1; // to convert from 1-based to 0... Bucket bucket = mCollList[val]; if (bucket != null) { return bucket.find(hash, firstQuad, secondQuad); } } // Nope, no match whatsoever return null; }
public ByteBasedPName find(int hash, int[] quads, int qlen) { if (mName.hashCode() == hash) { if (mName.equals(quads, qlen)) { return mName; } } for (Bucket curr = mNext; curr != null; curr = curr.mNext) { ByteBasedPName currName = curr.mName; if (currName.hashCode() == hash) { if (currName.equals(quads, qlen)) { return currName; } } } return null; }
public ByteBasedPName find(int hash, int firstQuad, int secondQuad) { if (mName.hashCode() == hash) { if (mName.equals(firstQuad, secondQuad)) { return mName; } } for (Bucket curr = mNext; curr != null; curr = curr.mNext) { ByteBasedPName currName = curr.mName; if (currName.hashCode() == hash) { if (currName.equals(firstQuad, secondQuad)) { return currName; } } } return null; }
private void rehash() { /* System.err.println("DEBUG: Rehashing!!!"); System.err.println("DEBUG: before rehash, "+toString()); System.err.println("-> ["); System.err.println(toDebugString()); System.err.println("DEBUG: ]"); */ mNeedRehash = false; // Note: since we'll make copies, no need to unshare, can just mark as such: mMainNamesShared = false; /* And then we can first deal with the main hash area. Since we * are expanding linearly (double up), we know there'll be no * collisions during this phase. */ int symbolsSeen = 0; // let's do a sanity check int[] oldMainHash = mMainHash; int len = oldMainHash.length; mMainHash = new int[len + len]; mMainHashMask = (len + len - 1); ByteBasedPName[] oldNames = mMainNames; mMainNames = new ByteBasedPName[len + len]; for (int i = 0; i < len; ++i) { ByteBasedPName symbol = oldNames[i]; if (symbol != null) { ++symbolsSeen; int hash = symbol.hashCode(); int ix = (hash & mMainHashMask); mMainNames[ix] = symbol; mMainHash[ix] = hash << 8; // will clear spill index } } /* And then the spill area. This may cause collisions, although * not necessarily as many as there were earlier. Let's allocate * same amount of space, however */ int oldEnd = mCollEnd; if (oldEnd == 0) { // no prior collisions... return; } mCollCount = 0; mCollEnd = 0; mCollListShared = false; Bucket[] oldBuckets = mCollList; mCollList = new Bucket[oldBuckets.length]; for (int i = 0; i < oldEnd; ++i) { for (Bucket curr = oldBuckets[i]; curr != null; curr = curr.mNext) { ++symbolsSeen; ByteBasedPName symbol = curr.mName; int hash = symbol.hashCode(); int ix = (hash & mMainHashMask); int val = mMainHash[ix]; if (mMainNames[ix] == null) { // no primary entry? mMainHash[ix] = (hash << 8); mMainNames[ix] = symbol; } else { // nope, it's a collision, need to spill over ++mCollCount; int bucket = val & 0xFF; if (bucket == 0) { // first spill over? if (mCollEnd <= LAST_VALID_BUCKET) { // yup, still unshared bucket bucket = mCollEnd; ++mCollEnd; // need to expand? if (bucket >= mCollList.length) { expandCollision(); } } else { // nope, have to share... let's find shortest? bucket = findBestBucket(); } // Need to mark the entry... and the spill index is 1-based mMainHash[ix] = (val & ~0xFF) | (bucket + 1); } else { --bucket; // 1-based index in value } // And then just need to link the new bucket entry in mCollList[bucket] = new Bucket(symbol, mCollList[bucket]); } } // for (... buckets in the chain ...) } // for (... list of bucket heads ... ) if (symbolsSeen != mCount) { // sanity check throw new Error( "Internal error: count after rehash " + symbolsSeen + "; should be " + mCount); } /* System.err.println("DEBUG: after rehash == ["); System.err.println(toDebugString()); System.err.println("DEBUG: ]"); */ }