Пример #1
0
  public void testRandom() throws Exception {
    List<TermFreq> freqs = new ArrayList<TermFreq>();
    Random rnd = random();
    for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
      int weight = rnd.nextInt(100);
      freqs.add(new TermFreq("" + rnd.nextLong(), weight));
    }

    FSTCompletionLookup lookup = new FSTCompletionLookup();
    lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));

    for (TermFreq tf : freqs) {
      final String term = tf.term.utf8ToString();
      for (int i = 1; i < term.length(); i++) {
        String prefix = term.substring(0, i);
        for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) {
          assertTrue(lr.key.toString().startsWith(prefix));
        }
      }
    }
  }
Пример #2
0
  public void testMultilingualInput() throws Exception {
    List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();

    FSTCompletionLookup lookup = new FSTCompletionLookup();
    lookup.build(new TermFreqArrayIterator(input));
    for (TermFreq tf : input) {
      assertNotNull(
          "Not found: " + tf.term.toString(),
          lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
      assertEquals(
          tf.term.utf8ToString(),
          lookup
              .lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1)
              .get(0)
              .key
              .toString());
    }

    List<LookupResult> result = lookup.lookup(stringToCharSequence("wit"), true, 5);
    assertEquals(5, result.size());
    assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
    assertTrue(result.get(1).key.toString().equals("with")); // highest count.
  }
Пример #3
0
  public void testLargeInputConstantWeights() throws Exception {
    FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);

    Random r = random();
    List<TermFreq> keys = new ArrayList<TermFreq>();
    for (int i = 0; i < 5000; i++) {
      keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
    }

    lookup.build(new TermFreqArrayIterator(keys));

    // All the weights were constant, so all returned buckets must be constant, whatever they
    // are.
    Long previous = null;
    for (TermFreq tf : keys) {
      Long current =
          ((Number) lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
      if (previous != null) {
        assertEquals(previous, current);
      }
      previous = current;
    }
  }