コード例 #1
0
  private void assertMatchEquals(List<Completion> res, String... expected) {
    String[] result = new String[res.size()];
    for (int i = 0; i < res.size(); i++) {
      result[i] = res.get(i).toString();
    }

    if (!Arrays.equals(stripScore(expected), stripScore(result))) {
      int colLen = Math.max(maxLen(expected), maxLen(result));

      StringBuilder b = new StringBuilder();
      String format = "%" + colLen + "s  " + "%" + colLen + "s\n";
      b.append(String.format(Locale.ROOT, format, "Expected", "Result"));
      for (int i = 0; i < Math.max(result.length, expected.length); i++) {
        b.append(
            String.format(
                Locale.ROOT,
                format,
                i < expected.length ? expected[i] : "--",
                i < result.length ? result[i] : "--"));
      }

      System.err.println(b.toString());
      fail("Expected different output:\n" + b.toString());
    }
  }
コード例 #2
0
 @Override
 public T get(int index) {
   int idx = distance + index;
   if (idx < 0 || idx >= in.size()) {
     idx -= in.size();
   }
   return in.get(idx);
 }
コード例 #3
0
  public void testThreeByte() throws Exception {
    String key =
        new String(new byte[] {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD, (byte) 0xA2}, "UTF-8");
    FSTCompletionBuilder builder = new FSTCompletionBuilder();
    builder.add(new BytesRef(key), 0);

    FSTCompletion lookup = builder.build();
    List<Completion> result = lookup.lookup(stringToCharSequence(key), 1);
    assertEquals(1, result.size());
  }
コード例 #4
0
  /** Return a rotated view of the given list with the given distance. */
  public static <T> List<T> rotate(final List<T> list, int distance) {
    if (list.isEmpty()) {
      return list;
    }

    int d = distance % list.size();
    if (d < 0) {
      d += list.size();
    }

    if (d == 0) {
      return list;
    }

    return new RotatedList<>(list, d);
  }
コード例 #5
0
  public void testRandom() throws Exception {
    List<TermFreq> freqs = new ArrayList<TermFreq>();
    Random rnd = random();
    for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
      int weight = rnd.nextInt(100);
      freqs.add(new TermFreq("" + rnd.nextLong(), weight));
    }

    FSTCompletionLookup lookup = new FSTCompletionLookup();
    lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));

    for (TermFreq tf : freqs) {
      final String term = tf.term.utf8ToString();
      for (int i = 1; i < term.length(); i++) {
        String prefix = term.substring(0, i);
        for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) {
          assertTrue(lr.key.toString().startsWith(prefix));
        }
      }
    }
  }
コード例 #6
0
  public void testMultilingualInput() throws Exception {
    List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();

    FSTCompletionLookup lookup = new FSTCompletionLookup();
    lookup.build(new TermFreqArrayIterator(input));
    for (TermFreq tf : input) {
      assertNotNull(
          "Not found: " + tf.term.toString(),
          lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
      assertEquals(
          tf.term.utf8ToString(),
          lookup
              .lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1)
              .get(0)
              .key
              .toString());
    }

    List<LookupResult> result = lookup.lookup(stringToCharSequence("wit"), true, 5);
    assertEquals(5, result.size());
    assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
    assertTrue(result.get(1).key.toString().equals("with")); // highest count.
  }
コード例 #7
0
  public void testLargeInputConstantWeights() throws Exception {
    FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);

    Random r = random();
    List<TermFreq> keys = new ArrayList<TermFreq>();
    for (int i = 0; i < 5000; i++) {
      keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
    }

    lookup.build(new TermFreqArrayIterator(keys));

    // All the weights were constant, so all returned buckets must be constant, whatever they
    // are.
    Long previous = null;
    for (TermFreq tf : keys) {
      Long current =
          ((Number) lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
      if (previous != null) {
        assertEquals(previous, current);
      }
      previous = current;
    }
  }
コード例 #8
0
  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }
コード例 #9
0
 @Override
 public int size() {
   return in.size();
 }
コード例 #10
0
 public RotatedList(List<T> list, int distance) {
   Preconditions.checkArgument(distance >= 0 && distance < list.size());
   Preconditions.checkArgument(list instanceof RandomAccess);
   this.in = list;
   this.distance = distance;
 }