// [core#191] @SuppressWarnings("deprecation") public void testShortNameCollisionsDirect() throws IOException { final int COUNT = 600; // First, char-based { CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1); for (int i = 0; i < COUNT; ++i) { String id = String.valueOf((char) i); char[] ch = id.toCharArray(); symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id)); } assertEquals(COUNT, symbols.size()); assertEquals(1024, symbols.bucketCount()); assertEquals(16, symbols.collisionCount()); assertEquals(1, symbols.maxCollisionLength()); } // then byte-based { BytesToNameCanonicalizer symbols = BytesToNameCanonicalizer.createRoot(1).makeChild(JsonFactory.Feature.collectDefaults()); for (int i = 0; i < COUNT; ++i) { String id = String.valueOf((char) i); int[] quads = calcQuads(id.getBytes("UTF-8")); symbols.addName(id, quads, quads.length); } assertEquals(COUNT, symbols.size()); assertEquals(1024, symbols.bucketCount()); assertEquals(209, symbols.collisionCount()); assertEquals(1, symbols.maxCollisionLength()); } }
// [core#191] public void testShortQuotedDirectChars() throws IOException { final int COUNT = 400; CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1); for (int i = 0; i < COUNT; ++i) { String id = String.format("\\u%04x", i); char[] ch = id.toCharArray(); symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id)); } assertEquals(COUNT, symbols.size()); assertEquals(1024, symbols.bucketCount()); assertEquals(50, symbols.collisionCount()); assertEquals(2, symbols.maxCollisionLength()); }
// [core#187]: unexpectedly high number of collisions for straight numbers public void testCollisionsWithChars187() throws IOException { CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1); final int COUNT = 30000; for (int i = 0; i < COUNT; ++i) { String id = String.valueOf(10000 + i); char[] ch = id.toCharArray(); symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id)); } assertEquals(COUNT, symbols.size()); assertEquals(65536, symbols.bucketCount()); // collision count rather high, but has to do assertEquals(7127, symbols.collisionCount()); // as well as collision counts assertEquals(4, symbols.maxCollisionLength()); }
// Test for verifying stability of hashCode, wrt collisions, using // synthetic field name generation and character-based input public void testSyntheticWithChars() { // pass seed, to keep results consistent: CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1); final int COUNT = 12000; for (int i = 0; i < COUNT; ++i) { String id = fieldNameFor(i); char[] ch = id.toCharArray(); symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id)); } assertEquals(16384, symbols.bucketCount()); assertEquals(COUNT, symbols.size()); // System.out.printf("Char stuff: collisions %d, max-coll %d\n", symbols.collisionCount(), // symbols.maxCollisionLength()); // holy guacamoley... there are way too many. 31 gives 3567 (!), 33 gives 2747 // ... at least before shuffling. Shuffling helps quite a lot, so: assertEquals(3431, symbols.collisionCount()); assertEquals(6, symbols.maxCollisionLength()); }