public void testThreeByte() throws Exception { String key = new String(new byte[] {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD, (byte) 0xA2}, "UTF-8"); FSTCompletionBuilder builder = new FSTCompletionBuilder(); builder.add(new BytesRef(key), 0); FSTCompletion lookup = builder.build(); List<Completion> result = lookup.lookup(stringToCharSequence(key), 1); assertEquals(1, result.size()); }
@Override public synchronized boolean store(OutputStream output) throws IOException { try { if (this.normalCompletion == null || normalCompletion.getFST() == null) return false; normalCompletion.getFST().save(new OutputStreamDataOutput(output)); } finally { IOUtils.close(output); } return true; }
public void testRequestedCount() throws Exception { // 'one' is promoted after collecting two higher ranking results. assertMatchEquals(completion.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0"); // 'four' is collected in a bucket and then again as an exact match. assertMatchEquals( completion.lookup(stringToCharSequence("four"), 2), "four/0.0", "fourblah/1.0"); // Check reordering of exact matches. assertMatchEquals( completion.lookup(stringToCharSequence("four"), 4), "four/0.0", "fourblah/1.0", "fourteen/1.0", "fourier/0.0"); // 'one' is at the top after collecting all alphabetical results. assertMatchEquals( completionAlphabetical.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0"); // 'one' is not promoted after collecting two higher ranking results. FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false); assertMatchEquals( noPromotion.lookup(stringToCharSequence("one"), 2), "oneness/1.0", "onerous/1.0"); // 'one' is at the top after collecting all alphabetical results. assertMatchEquals( completionAlphabetical.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0"); }
public void testFullMatchList() throws Exception { assertMatchEquals( completion.lookup(stringToCharSequence("one"), Integer.MAX_VALUE), "oneness/1.0", "onerous/1.0", "onesimus/1.0", "one/0.0"); }
public void testExactMatchReordering() throws Exception { // Check reordering of exact matches. assertMatchEquals( completion.lookup(stringToCharSequence("four"), 4), "four/0.0", "fourblah/1.0", "fourteen/1.0", "fourier/0.0"); }
@Override public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) { final List<Completion> completions; if (higherWeightsFirst) { completions = higherWeightsCompletion.lookup(key, num); } else { completions = normalCompletion.lookup(key, num); } final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size()); CharsRef spare = new CharsRef(); for (Completion c : completions) { spare.grow(c.utf8.length); UnicodeUtil.UTF8toUTF16(c.utf8, spare); results.add(new LookupResult(spare.toString(), c.bucket)); } return results; }
public void setUp() throws Exception { super.setUp(); FSTCompletionBuilder builder = new FSTCompletionBuilder(); for (TermFreq tf : evalKeys()) { builder.add(tf.term, (int) tf.v); } completion = builder.build(); completionAlphabetical = new FSTCompletion(completion.getFST(), false, true); }
@Override public synchronized boolean load(InputStream input) throws IOException { try { this.higherWeightsCompletion = new FSTCompletion( new FST<Object>(new InputStreamDataInput(input), NoOutputs.getSingleton())); this.normalCompletion = new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst); } finally { IOUtils.close(input); } return true; }
public void testExactMatchLowPriority() throws Exception { assertMatchEquals(completion.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0"); }
public void testExactMatchHighPriority() throws Exception { assertMatchEquals(completion.lookup(stringToCharSequence("two"), 1), "two/1.0"); }
public void testEmptyInput() throws Exception { completion = new FSTCompletionBuilder().build(); assertMatchEquals(completion.lookup(stringToCharSequence(""), 10)); }
public void testAlphabeticWithWeights() throws Exception { assertEquals(0, completionAlphabetical.lookup(stringToCharSequence("xyz"), 1).size()); }
public void testMiss() throws Exception { assertMatchEquals(completion.lookup(stringToCharSequence("xyz"), 1)); }
/** * Returns the bucket (weight) as a Long for the provided key if it exists, otherwise null if it * does not. */ public Object get(CharSequence key) { final int bucket = normalCompletion.getBucket(key); return bucket == -1 ? null : Long.valueOf(bucket); }
@Override public void build(TermFreqIterator tfit) throws IOException { if (tfit instanceof TermFreqPayloadIterator) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } File tempInput = File.createTempFile( FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir()); File tempSorted = File.createTempFile( FSTCompletionLookup.class.getSimpleName(), ".sorted", Sort.defaultTempDir()); Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); Sort.ByteSequencesReader reader = null; ExternalRefSorter sorter = null; // Push floats up front before sequences to sort them. For now, assume they are non-negative. // If negative floats are allowed some trickery needs to be done to find their byte order. boolean success = false; try { byte[] buffer = new byte[0]; ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); BytesRef spare; while ((spare = tfit.next()) != null) { if (spare.length + 4 >= buffer.length) { buffer = ArrayUtil.grow(buffer, spare.length + 4); } output.reset(buffer); output.writeInt(encodeWeight(tfit.weight())); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); } writer.close(); // We don't know the distribution of scores and we need to bucket them, so we'll sort // and divide into equal buckets. SortInfo info = new Sort().sort(tempInput, tempSorted); tempInput.delete(); FSTCompletionBuilder builder = new FSTCompletionBuilder( buckets, sorter = new ExternalRefSorter(new Sort()), sharedTailLength); final int inputLines = info.lines; reader = new Sort.ByteSequencesReader(tempSorted); long line = 0; int previousBucket = 0; int previousScore = 0; ByteArrayDataInput input = new ByteArrayDataInput(); BytesRef tmp1 = new BytesRef(); BytesRef tmp2 = new BytesRef(); while (reader.read(tmp1)) { input.reset(tmp1.bytes); int currentScore = input.readInt(); int bucket; if (line > 0 && currentScore == previousScore) { bucket = previousBucket; } else { bucket = (int) (line * buckets / inputLines); } previousScore = currentScore; previousBucket = bucket; // Only append the input, discard the weight. tmp2.bytes = tmp1.bytes; tmp2.offset = input.getPosition(); tmp2.length = tmp1.length - input.getPosition(); builder.add(tmp2, bucket); line++; } // The two FSTCompletions share the same automaton. this.higherWeightsCompletion = builder.build(); this.normalCompletion = new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst); success = true; } finally { if (success) IOUtils.close(reader, writer, sorter); else IOUtils.closeWhileHandlingException(reader, writer, sorter); tempInput.delete(); tempSorted.delete(); } }
/** * This constructor takes a pre-built automaton. * * @param completion An instance of {@link FSTCompletion}. * @param exactMatchFirst If <code>true</code> exact matches are promoted to the top of the * suggestions list. Otherwise they appear in the order of discretized weight and alphabetical * within the bucket. */ public FSTCompletionLookup(FSTCompletion completion, boolean exactMatchFirst) { this(INVALID_BUCKETS_COUNT, exactMatchFirst); this.normalCompletion = new FSTCompletion(completion.getFST(), false, exactMatchFirst); this.higherWeightsCompletion = new FSTCompletion(completion.getFST(), true, exactMatchFirst); }