示例#1
0
  // [core#191]
  @SuppressWarnings("deprecation")
  public void testShortNameCollisionsDirect() throws IOException {
    final int COUNT = 600;

    // First, char-based
    {
      CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1);
      for (int i = 0; i < COUNT; ++i) {
        String id = String.valueOf((char) i);
        char[] ch = id.toCharArray();
        symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id));
      }
      assertEquals(COUNT, symbols.size());
      assertEquals(1024, symbols.bucketCount());

      assertEquals(16, symbols.collisionCount());
      assertEquals(1, symbols.maxCollisionLength());
    }

    // then byte-based
    {
      BytesToNameCanonicalizer symbols =
          BytesToNameCanonicalizer.createRoot(1).makeChild(JsonFactory.Feature.collectDefaults());
      for (int i = 0; i < COUNT; ++i) {
        String id = String.valueOf((char) i);
        int[] quads = calcQuads(id.getBytes("UTF-8"));
        symbols.addName(id, quads, quads.length);
      }
      assertEquals(COUNT, symbols.size());
      assertEquals(1024, symbols.bucketCount());

      assertEquals(209, symbols.collisionCount());
      assertEquals(1, symbols.maxCollisionLength());
    }
  }
示例#2
0
  // [core#191]
  public void testShortQuotedDirectChars() throws IOException {
    final int COUNT = 400;

    CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1);
    for (int i = 0; i < COUNT; ++i) {
      String id = String.format("\\u%04x", i);
      char[] ch = id.toCharArray();
      symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id));
    }
    assertEquals(COUNT, symbols.size());
    assertEquals(1024, symbols.bucketCount());

    assertEquals(50, symbols.collisionCount());
    assertEquals(2, symbols.maxCollisionLength());
  }
示例#3
0
  // [core#187]: unexpectedly high number of collisions for straight numbers
  public void testCollisionsWithChars187() throws IOException {
    CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1);
    final int COUNT = 30000;
    for (int i = 0; i < COUNT; ++i) {
      String id = String.valueOf(10000 + i);
      char[] ch = id.toCharArray();
      symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id));
    }
    assertEquals(COUNT, symbols.size());
    assertEquals(65536, symbols.bucketCount());

    // collision count rather high, but has to do
    assertEquals(7127, symbols.collisionCount());
    // as well as collision counts
    assertEquals(4, symbols.maxCollisionLength());
  }
示例#4
0
  // [Issue#145]
  public void testThousandsOfSymbolsWithChars() throws IOException {
    final int SEED = 33333;

    CharsToNameCanonicalizer symbolsCRoot = CharsToNameCanonicalizer.createRoot(SEED);
    int exp = 0;

    for (int doc = 0; doc < 100; ++doc) {
      CharsToNameCanonicalizer symbolsC =
          symbolsCRoot.makeChild(JsonFactory.Feature.collectDefaults());
      for (int i = 0; i < 250; ++i) {
        String name = "f_" + doc + "_" + i;
        char[] ch = name.toCharArray();
        String str = symbolsC.findSymbol(ch, 0, ch.length, symbolsC.calcHash(name));
        assertNotNull(str);
      }
      symbolsC.release();
      exp += 250;
      if (exp > CharsToNameCanonicalizer.MAX_ENTRIES_FOR_REUSE) {
        exp = 0;
      }
      assertEquals(exp, symbolsCRoot.size());
    }
  }
示例#5
0
  // Test for verifying stability of hashCode, wrt collisions, using
  // synthetic field name generation and character-based input
  public void testSyntheticWithChars() {
    // pass seed, to keep results consistent:
    CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1);
    final int COUNT = 12000;
    for (int i = 0; i < COUNT; ++i) {
      String id = fieldNameFor(i);
      char[] ch = id.toCharArray();
      symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id));
    }

    assertEquals(16384, symbols.bucketCount());
    assertEquals(COUNT, symbols.size());

    // System.out.printf("Char stuff: collisions %d, max-coll %d\n", symbols.collisionCount(),
    // symbols.maxCollisionLength());

    // holy guacamoley... there are way too many. 31 gives 3567 (!), 33 gives 2747
    // ... at least before shuffling. Shuffling helps quite a lot, so:

    assertEquals(3431, symbols.collisionCount());

    assertEquals(6, symbols.maxCollisionLength());
  }