Ejemplo n.º 1
0
  public void testThreeByte() throws Exception {
    String key =
        new String(new byte[] {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD, (byte) 0xA2}, "UTF-8");
    FSTCompletionBuilder builder = new FSTCompletionBuilder();
    builder.add(new BytesRef(key), 0);

    FSTCompletion lookup = builder.build();
    List<Completion> result = lookup.lookup(stringToCharSequence(key), 1);
    assertEquals(1, result.size());
  }
  @Override
  public synchronized boolean store(OutputStream output) throws IOException {

    try {
      if (this.normalCompletion == null || normalCompletion.getFST() == null) return false;
      normalCompletion.getFST().save(new OutputStreamDataOutput(output));
    } finally {
      IOUtils.close(output);
    }
    return true;
  }
Ejemplo n.º 3
0
  public void testRequestedCount() throws Exception {
    // 'one' is promoted after collecting two higher ranking results.
    assertMatchEquals(completion.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0");

    // 'four' is collected in a bucket and then again as an exact match.
    assertMatchEquals(
        completion.lookup(stringToCharSequence("four"), 2), "four/0.0", "fourblah/1.0");

    // Check reordering of exact matches.
    assertMatchEquals(
        completion.lookup(stringToCharSequence("four"), 4),
        "four/0.0",
        "fourblah/1.0",
        "fourteen/1.0",
        "fourier/0.0");

    // 'one' is at the top after collecting all alphabetical results.
    assertMatchEquals(
        completionAlphabetical.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0");

    // 'one' is not promoted after collecting two higher ranking results.
    FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false);
    assertMatchEquals(
        noPromotion.lookup(stringToCharSequence("one"), 2), "oneness/1.0", "onerous/1.0");

    // 'one' is at the top after collecting all alphabetical results.
    assertMatchEquals(
        completionAlphabetical.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0");
  }
Ejemplo n.º 4
0
 public void testFullMatchList() throws Exception {
   assertMatchEquals(
       completion.lookup(stringToCharSequence("one"), Integer.MAX_VALUE),
       "oneness/1.0",
       "onerous/1.0",
       "onesimus/1.0",
       "one/0.0");
 }
Ejemplo n.º 5
0
 public void testExactMatchReordering() throws Exception {
   // Check reordering of exact matches.
   assertMatchEquals(
       completion.lookup(stringToCharSequence("four"), 4),
       "four/0.0",
       "fourblah/1.0",
       "fourteen/1.0",
       "fourier/0.0");
 }
  @Override
  public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
    final List<Completion> completions;
    if (higherWeightsFirst) {
      completions = higherWeightsCompletion.lookup(key, num);
    } else {
      completions = normalCompletion.lookup(key, num);
    }

    final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
    CharsRef spare = new CharsRef();
    for (Completion c : completions) {
      spare.grow(c.utf8.length);
      UnicodeUtil.UTF8toUTF16(c.utf8, spare);
      results.add(new LookupResult(spare.toString(), c.bucket));
    }
    return results;
  }
Ejemplo n.º 7
0
  public void setUp() throws Exception {
    super.setUp();

    FSTCompletionBuilder builder = new FSTCompletionBuilder();
    for (TermFreq tf : evalKeys()) {
      builder.add(tf.term, (int) tf.v);
    }
    completion = builder.build();
    completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
  }
 @Override
 public synchronized boolean load(InputStream input) throws IOException {
   try {
     this.higherWeightsCompletion =
         new FSTCompletion(
             new FST<Object>(new InputStreamDataInput(input), NoOutputs.getSingleton()));
     this.normalCompletion =
         new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);
   } finally {
     IOUtils.close(input);
   }
   return true;
 }
Ejemplo n.º 9
0
 public void testExactMatchLowPriority() throws Exception {
   assertMatchEquals(completion.lookup(stringToCharSequence("one"), 2), "one/0.0", "oneness/1.0");
 }
Ejemplo n.º 10
0
 public void testExactMatchHighPriority() throws Exception {
   assertMatchEquals(completion.lookup(stringToCharSequence("two"), 1), "two/1.0");
 }
Ejemplo n.º 11
0
 public void testEmptyInput() throws Exception {
   completion = new FSTCompletionBuilder().build();
   assertMatchEquals(completion.lookup(stringToCharSequence(""), 10));
 }
Ejemplo n.º 12
0
 public void testAlphabeticWithWeights() throws Exception {
   assertEquals(0, completionAlphabetical.lookup(stringToCharSequence("xyz"), 1).size());
 }
Ejemplo n.º 13
0
 public void testMiss() throws Exception {
   assertMatchEquals(completion.lookup(stringToCharSequence("xyz"), 1));
 }
Ejemplo n.º 14
0
 /**
  * Returns the bucket (weight) as a Long for the provided key if it exists, otherwise null if it
  * does not.
  */
 public Object get(CharSequence key) {
   final int bucket = normalCompletion.getBucket(key);
   return bucket == -1 ? null : Long.valueOf(bucket);
 }
Ejemplo n.º 15
0
  @Override
  public void build(TermFreqIterator tfit) throws IOException {
    if (tfit instanceof TermFreqPayloadIterator) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    File tempInput =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir());
    File tempSorted =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".sorted", Sort.defaultTempDir());

    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    ExternalRefSorter sorter = null;

    // Push floats up front before sequences to sort them. For now, assume they are non-negative.
    // If negative floats are allowed some trickery needs to be done to find their byte order.
    boolean success = false;
    try {
      byte[] buffer = new byte[0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef spare;
      while ((spare = tfit.next()) != null) {
        if (spare.length + 4 >= buffer.length) {
          buffer = ArrayUtil.grow(buffer, spare.length + 4);
        }

        output.reset(buffer);
        output.writeInt(encodeWeight(tfit.weight()));
        output.writeBytes(spare.bytes, spare.offset, spare.length);
        writer.write(buffer, 0, output.getPosition());
      }
      writer.close();

      // We don't know the distribution of scores and we need to bucket them, so we'll sort
      // and divide into equal buckets.
      SortInfo info = new Sort().sort(tempInput, tempSorted);
      tempInput.delete();
      FSTCompletionBuilder builder =
          new FSTCompletionBuilder(
              buckets, sorter = new ExternalRefSorter(new Sort()), sharedTailLength);

      final int inputLines = info.lines;
      reader = new Sort.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
      int previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
        int currentScore = input.readInt();

        int bucket;
        if (line > 0 && currentScore == previousScore) {
          bucket = previousBucket;
        } else {
          bucket = (int) (line * buckets / inputLines);
        }
        previousScore = currentScore;
        previousBucket = bucket;

        // Only append the input, discard the weight.
        tmp2.bytes = tmp1.bytes;
        tmp2.offset = input.getPosition();
        tmp2.length = tmp1.length - input.getPosition();
        builder.add(tmp2, bucket);

        line++;
      }

      // The two FSTCompletions share the same automaton.
      this.higherWeightsCompletion = builder.build();
      this.normalCompletion =
          new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);

      success = true;
    } finally {
      if (success) IOUtils.close(reader, writer, sorter);
      else IOUtils.closeWhileHandlingException(reader, writer, sorter);

      tempInput.delete();
      tempSorted.delete();
    }
  }
Ejemplo n.º 16
0
 /**
  * This constructor takes a pre-built automaton.
  *
  * @param completion An instance of {@link FSTCompletion}.
  * @param exactMatchFirst If <code>true</code> exact matches are promoted to the top of the
  *     suggestions list. Otherwise they appear in the order of discretized weight and alphabetical
  *     within the bucket.
  */
 public FSTCompletionLookup(FSTCompletion completion, boolean exactMatchFirst) {
   this(INVALID_BUCKETS_COUNT, exactMatchFirst);
   this.normalCompletion = new FSTCompletion(completion.getFST(), false, exactMatchFirst);
   this.higherWeightsCompletion = new FSTCompletion(completion.getFST(), true, exactMatchFirst);
 }