Пример #1
0
  // Since 2.6
  public void testThousandsOfSymbolsWithNew() throws IOException {
    final int SEED = 33333;

    ByteQuadsCanonicalizer symbolsBRoot = ByteQuadsCanonicalizer.createRoot(SEED);
    final Charset utf8 = Charset.forName("UTF-8");
    int exp = 0;
    ByteQuadsCanonicalizer symbolsB = null;

    // loop to get
    for (int doc = 0; doc < 100; ++doc) {
      symbolsB = symbolsBRoot.makeChild(JsonFactory.Feature.collectDefaults());
      for (int i = 0; i < 250; ++i) {
        String name = "f_" + doc + "_" + i;

        int[] quads = calcQuads(name.getBytes(utf8));

        symbolsB.addName(name, quads, quads.length);
        String n = symbolsB.findName(quads, quads.length);
        assertEquals(name, n);
      }
      symbolsB.release();

      exp += 250;
      if (exp > ByteQuadsCanonicalizer.MAX_ENTRIES_FOR_REUSE) {
        exp = 0;
      }
      assertEquals(exp, symbolsBRoot.size());
    }
    /* 05-Feb-2015, tatu: Fragile, but it is important to ensure that collision
     *   rates are not accidentally increased...
     */
    assertEquals(6250, symbolsB.size());
    assertEquals(4761, symbolsB.primaryCount()); // 80% primary hit rate
    assertEquals(1190, symbolsB.secondaryCount()); // 13% secondary
    assertEquals(299, symbolsB.tertiaryCount()); // 7% tertiary
    assertEquals(0, symbolsB.spilloverCount()); // and couple of leftovers
  }