Esempio n. 1
0
      IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
        super();
        // if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
        this.fst = dict;
        this.fstReader = fst.getBytesReader();
        this.fstOutputs = dict.outputs;
        this.fsa = compiled.runAutomaton;
        this.level = -1;
        this.stack = new Frame[16];
        for (int i = 0; i < stack.length; i++) {
          this.stack[i] = new Frame();
        }

        Frame frame;
        frame = loadVirtualFrame(newFrame());
        this.level++;
        frame = loadFirstFrame(newFrame());
        pushFrame(frame);

        this.meta = null;
        this.metaUpto = 1;
        this.decoded = false;
        this.pending = false;

        if (startTerm == null) {
          pending = isAccept(topFrame());
        } else {
          doSeekCeil(startTerm);
          pending = !startTerm.equals(term) && isValid(topFrame()) && isAccept(topFrame());
        }
      }
  /**
   * @param input input tokenstream
   * @param synonyms synonym map
   * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}. Note,
   *     if you set this to true, it's your responsibility to lowercase the input entries when you
   *     create the {@link SynonymMap}
   */
  public SynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
    super(input);
    this.synonyms = synonyms;
    this.ignoreCase = ignoreCase;
    this.fst = synonyms.fst;
    if (fst == null) {
      throw new IllegalArgumentException("fst must be non-null");
    }
    this.fstReader = fst.getBytesReader();

    // Must be 1+ so that when roll buffer is at full
    // lookahead we can distinguish this full buffer from
    // the empty buffer:
    rollBufferSize = 1 + synonyms.maxHorizontalContext;

    futureInputs = new PendingInput[rollBufferSize];
    futureOutputs = new PendingOutputs[rollBufferSize];
    for (int pos = 0; pos < rollBufferSize; pos++) {
      futureInputs[pos] = new PendingInput();
      futureOutputs[pos] = new PendingOutputs();
    }

    // System.out.println("FSTFilt maxH=" + synonyms.maxHorizontalContext);

    scratchArc = new FST.Arc<>();
  }
  private Long lookupPrefix(BytesRef scratch, Arc<Long> arc) throws /*Bogus*/ IOException {
    assert 0 == fst.outputs.getNoOutput().longValue();
    long output = 0;
    BytesReader bytesReader = fst.getBytesReader(0);

    fst.getFirstArc(arc);

    byte[] bytes = scratch.bytes;
    int pos = scratch.offset;
    int end = pos + scratch.length;
    while (pos < end) {
      if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
        return null;
      } else {
        output += arc.output.longValue();
      }
    }

    return output;
  }
Esempio n. 4
0
 static <T> void walk(FST<T> fst) throws IOException {
   final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
   final BitSet seen = new BitSet();
   final FST.BytesReader reader = fst.getBytesReader();
   final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
   queue.add(startArc);
   while (!queue.isEmpty()) {
     final FST.Arc<T> arc = queue.remove(0);
     final long node = arc.target;
     // System.out.println(arc);
     if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
       seen.set((int) node);
       fst.readFirstRealTargetArc(node, arc, reader);
       while (true) {
         queue.add(new FST.Arc<T>().copyFrom(arc));
         if (arc.isLast()) {
           break;
         } else {
           fst.readNextRealArc(arc, reader);
         }
       }
     }
   }
 }
 FSTTermsEnum(FST<Long> fst) {
   this.fst = fst;
   in = new BytesRefFSTEnum<>(fst);
   bytesReader = fst.getBytesReader();
 }
  @Override
  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
      return DocValues.emptySortedSet(); // empty FST!
    }
    FST<Long> instance;
    synchronized (this) {
      instance = fstInstances.get(field.name);
      if (instance == null) {
        data.seek(entry.offset);
        instance = new FST<>(data, PositiveIntOutputs.getSingleton());
        if (!merging) {
          ramBytesUsed.addAndGet(instance.ramBytesUsed());
          fstInstances.put(field.name, instance);
        }
      }
    }
    final BinaryDocValues docToOrds = getBinary(field);
    final FST<Long> fst = instance;

    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new SortedSetDocValues() {
      final BytesRefBuilder term = new BytesRefBuilder();
      BytesRef ordsRef;
      long currentOrd;

      @Override
      public long nextOrd() {
        if (input.eof()) {
          return NO_MORE_ORDS;
        } else {
          currentOrd += input.readVLong();
          return currentOrd;
        }
      }

      @Override
      public void setDocument(int docID) {
        ordsRef = docToOrds.get(docID);
        input.reset(ordsRef.bytes, ordsRef.offset, ordsRef.length);
        currentOrd = 0;
      }

      @Override
      public BytesRef lookupOrd(long ord) {
        try {
          in.setPosition(0);
          fst.getFirstArc(firstArc);
          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
          term.grow(output.length);
          term.clear();
          return Util.toBytesRef(output, term);
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public long lookupTerm(BytesRef key) {
        try {
          InputOutput<Long> o = fstEnum.seekCeil(key);
          if (o == null) {
            return -getValueCount() - 1;
          } else if (o.input.equals(key)) {
            return o.output.intValue();
          } else {
            return -o.output - 1;
          }
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public long getValueCount() {
        return entry.numOrds;
      }

      @Override
      public TermsEnum termsEnum() {
        return new FSTTermsEnum(fst);
      }
    };
  }
  @Override
  public SortedDocValues getSorted(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(field.name);
    FST<Long> instance;
    synchronized (this) {
      instance = fstInstances.get(field.name);
      if (instance == null) {
        data.seek(entry.offset);
        instance = new FST<>(data, PositiveIntOutputs.getSingleton());
        if (!merging) {
          ramBytesUsed.addAndGet(instance.ramBytesUsed());
          fstInstances.put(field.name, instance);
        }
      }
    }
    final NumericDocValues docToOrd = getNumeric(field);
    final FST<Long> fst = instance;

    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);

    return new SortedDocValues() {

      final BytesRefBuilder term = new BytesRefBuilder();

      @Override
      public int getOrd(int docID) {
        return (int) docToOrd.get(docID);
      }

      @Override
      public BytesRef lookupOrd(int ord) {
        try {
          in.setPosition(0);
          fst.getFirstArc(firstArc);
          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
          term.grow(output.length);
          term.clear();
          return Util.toBytesRef(output, term);
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public int lookupTerm(BytesRef key) {
        try {
          InputOutput<Long> o = fstEnum.seekCeil(key);
          if (o == null) {
            return -getValueCount() - 1;
          } else if (o.input.equals(key)) {
            return o.output.intValue();
          } else {
            return (int) -o.output - 1;
          }
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public int getValueCount() {
        return (int) entry.numOrds;
      }

      @Override
      public TermsEnum termsEnum() {
        return new FSTTermsEnum(fst);
      }
    };
  }