예제 #1
0
 /**
  * Copies bytes from the pool starting at the given offset with the given length into the given
  * {@link BytesRef} at offset <tt>0</tt> and returns it.
  *
  * <p>Note: this method allows to copy across block boundaries.
  */
 public final BytesRef copyFrom(final BytesRef bytes, final int offset, final int length) {
   bytes.offset = 0;
   bytes.grow(length);
   bytes.length = length;
   int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
   byte[] buffer = buffers[bufferIndex];
   int pos = offset & BYTE_BLOCK_MASK;
   int overflow = (pos + length) - BYTE_BLOCK_SIZE;
   do {
     if (overflow <= 0) {
       System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytes.length);
       bytes.length = length;
       bytes.offset = 0;
       break;
     } else {
       final int bytesToCopy = length - overflow;
       System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytesToCopy);
       pos = 0;
       bytes.length -= bytesToCopy;
       bytes.offset += bytesToCopy;
       buffer = buffers[++bufferIndex];
       overflow = overflow - BYTE_BLOCK_SIZE;
     }
   } while (true);
   return bytes;
 }
예제 #2
0
 // Fill in a BytesRef from term's length & bytes encoded in
 // byte block
 public final BytesRef setBytesRef(BytesRef term, int textStart) {
   final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT];
   int pos = textStart & BYTE_BLOCK_MASK;
   if ((bytes[pos] & 0x80) == 0) {
     // length is 1 byte
     term.length = bytes[pos];
     term.offset = pos + 1;
   } else {
     // length is 2 bytes
     term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
     term.offset = pos + 2;
   }
   assert term.length >= 0;
   return term;
 }
  @Override
  public void build(TermFreqIterator tfit) throws IOException {
    if (tfit instanceof TermFreqPayloadIterator) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    File tempInput =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir());
    File tempSorted =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".sorted", Sort.defaultTempDir());

    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    ExternalRefSorter sorter = null;

    // Push floats up front before sequences to sort them. For now, assume they are non-negative.
    // If negative floats are allowed some trickery needs to be done to find their byte order.
    boolean success = false;
    try {
      byte[] buffer = new byte[0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef spare;
      while ((spare = tfit.next()) != null) {
        if (spare.length + 4 >= buffer.length) {
          buffer = ArrayUtil.grow(buffer, spare.length + 4);
        }

        output.reset(buffer);
        output.writeInt(encodeWeight(tfit.weight()));
        output.writeBytes(spare.bytes, spare.offset, spare.length);
        writer.write(buffer, 0, output.getPosition());
      }
      writer.close();

      // We don't know the distribution of scores and we need to bucket them, so we'll sort
      // and divide into equal buckets.
      SortInfo info = new Sort().sort(tempInput, tempSorted);
      tempInput.delete();
      FSTCompletionBuilder builder =
          new FSTCompletionBuilder(
              buckets, sorter = new ExternalRefSorter(new Sort()), sharedTailLength);

      final int inputLines = info.lines;
      reader = new Sort.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
      int previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
        int currentScore = input.readInt();

        int bucket;
        if (line > 0 && currentScore == previousScore) {
          bucket = previousBucket;
        } else {
          bucket = (int) (line * buckets / inputLines);
        }
        previousScore = currentScore;
        previousBucket = bucket;

        // Only append the input, discard the weight.
        tmp2.bytes = tmp1.bytes;
        tmp2.offset = input.getPosition();
        tmp2.length = tmp1.length - input.getPosition();
        builder.add(tmp2, bucket);

        line++;
      }

      // The two FSTCompletions share the same automaton.
      this.higherWeightsCompletion = builder.build();
      this.normalCompletion =
          new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);

      success = true;
    } finally {
      if (success) IOUtils.close(reader, writer, sorter);
      else IOUtils.closeWhileHandlingException(reader, writer, sorter);

      tempInput.delete();
      tempSorted.delete();
    }
  }