private void assertMatchEquals(List<Completion> res, String... expected) { String[] result = new String[res.size()]; for (int i = 0; i < res.size(); i++) { result[i] = res.get(i).toString(); } if (!Arrays.equals(stripScore(expected), stripScore(result))) { int colLen = Math.max(maxLen(expected), maxLen(result)); StringBuilder b = new StringBuilder(); String format = "%" + colLen + "s " + "%" + colLen + "s\n"; b.append(String.format(Locale.ROOT, format, "Expected", "Result")); for (int i = 0; i < Math.max(result.length, expected.length); i++) { b.append( String.format( Locale.ROOT, format, i < expected.length ? expected[i] : "--", i < result.length ? result[i] : "--")); } System.err.println(b.toString()); fail("Expected different output:\n" + b.toString()); } }
@Override public T get(int index) { int idx = distance + index; if (idx < 0 || idx >= in.size()) { idx -= in.size(); } return in.get(idx); }
public void testThreeByte() throws Exception { String key = new String(new byte[] {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD, (byte) 0xA2}, "UTF-8"); FSTCompletionBuilder builder = new FSTCompletionBuilder(); builder.add(new BytesRef(key), 0); FSTCompletion lookup = builder.build(); List<Completion> result = lookup.lookup(stringToCharSequence(key), 1); assertEquals(1, result.size()); }
/** Return a rotated view of the given list with the given distance. */ public static <T> List<T> rotate(final List<T> list, int distance) { if (list.isEmpty()) { return list; } int d = distance % list.size(); if (d < 0) { d += list.size(); } if (d == 0) { return list; } return new RotatedList<>(list, d); }
public void testRandom() throws Exception { List<TermFreq> freqs = new ArrayList<TermFreq>(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); freqs.add(new TermFreq("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()]))); for (TermFreq tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) { assertTrue(lr.key.toString().startsWith(prefix)); } } } }
public void testMultilingualInput() throws Exception { List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki(); FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new TermFreqArrayIterator(input)); for (TermFreq tf : input) { assertNotNull( "Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))); assertEquals( tf.term.utf8ToString(), lookup .lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1) .get(0) .key .toString()); } List<LookupResult> result = lookup.lookup(stringToCharSequence("wit"), true, 5); assertEquals(5, result.size()); assertTrue(result.get(0).key.toString().equals("wit")); // exact match. assertTrue(result.get(1).key.toString().equals("with")); // highest count. }
public void testLargeInputConstantWeights() throws Exception { FSTCompletionLookup lookup = new FSTCompletionLookup(10, true); Random r = random(); List<TermFreq> keys = new ArrayList<TermFreq>(); for (int i = 0; i < 5000; i++) { keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1)); } lookup.build(new TermFreqArrayIterator(keys)); // All the weights were constant, so all returned buckets must be constant, whatever they // are. Long previous = null; for (TermFreq tf : keys) { Long current = ((Number) lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue(); if (previous != null) { assertEquals(previous, current); } previous = current; } }
public void testRandom() throws Exception { int num = atLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(NoMergePolicy.COMPOUND_FILES)); _TestUtil.keepFullyDeletedSegments(w); Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>(); Set<Integer> deleted = new HashSet<Integer>(); List<BytesRef> terms = new ArrayList<BytesRef>(); int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER); Document doc = new Document(); Field f = newStringField("field", "", Field.Store.NO); doc.add(f); Field id = newStringField("id", "", Field.Store.NO); doc.add(id); boolean onlyUniqueTerms = random().nextBoolean(); if (VERBOSE) { System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } Set<BytesRef> uniqueTerms = new HashSet<BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) { // re-use existing term BytesRef term = terms.get(random().nextInt(terms.size())); docs.get(term).add(i); f.setStringValue(term.utf8ToString()); } else { String s = _TestUtil.randomUnicodeString(random(), 10); BytesRef term = new BytesRef(s); if (!docs.containsKey(term)) { docs.put(term, new ArrayList<Integer>()); } docs.get(term).add(i); terms.add(term); uniqueTerms.add(term); f.setStringValue(s); } id.setStringValue("" + i); w.addDocument(doc); if (random().nextInt(4) == 1) { w.commit(); } if (i > 0 && random().nextInt(20) == 1) { int delID = random().nextInt(i); deleted.add(delID); w.deleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { System.out.println("TEST: delete " + delID); } } } if (VERBOSE) { List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms); Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator()); System.out.println("TEST: terms in UTF16 order:"); for (BytesRef b : termsList) { System.out.println(" " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b); for (int docID : docs.get(b)) { if (deleted.contains(docID)) { System.out.println(" " + docID + " (deleted)"); } else { System.out.println(" " + docID); } } } } IndexReader reader = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + reader); } Bits liveDocs = MultiFields.getLiveDocs(reader); for (int delDoc : deleted) { assertFalse(liveDocs.get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms.get(random().nextInt(terms.size())); if (VERBOSE) { System.out.println( "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term); } DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0); assertNotNull(docsEnum); for (int docID : docs.get(term)) { if (!deleted.contains(docID)) { assertEquals(docID, docsEnum.nextDoc()); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc()); } reader.close(); dir.close(); } }
@Override public int size() { return in.size(); }
public RotatedList(List<T> list, int distance) { Preconditions.checkArgument(distance >= 0 && distance < list.size()); Preconditions.checkArgument(list instanceof RandomAccess); this.in = list; this.distance = distance; }