@Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockIntFactory(blockSize));

    boolean success = false;
    TermsIndexWriterBase indexWriter;
    try {
      indexWriter = new FixedGapTermsIndexWriter(state);
      success = true;
    } finally {
      if (!success) {
        postingsWriter.close();
      }
    }

    success = false;
    try {
      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
      success = true;
      return ret;
    } finally {
      if (!success) {
        try {
          postingsWriter.close();
        } finally {
          indexWriter.close();
        }
      }
    }
  }
Пример #2
0
 @Override
 public void finishDoc() throws IOException {
   if (DEBUG) System.out.println("PW     finishDoc");
   if (pendingCount == -1) {
     wrappedPostingsWriter.finishDoc();
   }
 }
Пример #3
0
 @Override
 public void start(IndexOutput termsOut) throws IOException {
   this.termsOut = termsOut;
   CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
   termsOut.writeVInt(pending.length); // encode maxPositions in header
   wrappedPostingsWriter.start(termsOut);
 }
Пример #4
0
 // Currently, this instance is re-used across fields, so
 // our parent calls setField whenever the field changes
 @Override
 public void setField(FieldInfo fieldInfo) {
   this.indexOptions = fieldInfo.getIndexOptions();
   if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
   storePayloads = fieldInfo.hasPayloads();
   wrappedPostingsWriter.setField(fieldInfo);
   // DEBUG = BlockTreeTermsWriter.DEBUG;
 }
Пример #5
0
  @Override
  public void flushTermsBlock(int start, int count) throws IOException {
    if (DEBUG)
      System.out.println(
          "PW: flushTermsBlock start="
              + start
              + " count="
              + count
              + " pendingTerms.size()="
              + pendingTerms.size());
    int wrappedCount = 0;
    assert buffer.getFilePointer() == 0;
    assert start >= count;

    final int limit = pendingTerms.size() - start + count;

    for (int idx = pendingTerms.size() - start; idx < limit; idx++) {
      final PendingTerm term = pendingTerms.get(idx);
      if (term == null) {
        wrappedCount++;
      } else {
        buffer.writeVInt(term.bytes.length);
        buffer.writeBytes(term.bytes, 0, term.bytes.length);
      }
    }

    termsOut.writeVInt((int) buffer.getFilePointer());
    buffer.writeTo(termsOut);
    buffer.reset();

    // TDOO: this could be somewhat costly since
    // pendingTerms.size() could be biggish?
    int futureWrappedCount = 0;
    final int limit2 = pendingTerms.size();
    for (int idx = limit; idx < limit2; idx++) {
      if (pendingTerms.get(idx) == null) {
        futureWrappedCount++;
      }
    }

    // Remove the terms we just wrote:
    pendingTerms.subList(pendingTerms.size() - start, limit).clear();

    if (DEBUG)
      System.out.println(
          "PW:   len="
              + buffer.getFilePointer()
              + " fp="
              + termsOut.getFilePointer()
              + " futureWrappedCount="
              + futureWrappedCount
              + " wrappedCount="
              + wrappedCount);
    // TODO: can we avoid calling this if all terms
    // were inlined...?  Eg for a "primary key" field, the
    // wrapped codec is never invoked...
    wrappedPostingsWriter.flushTermsBlock(futureWrappedCount + wrappedCount, wrappedCount);
  }
 @Override
 public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
   PostingsWriterBase docsWriter = new Lucene40PostingsWriter(state);
   boolean success = false;
   try {
     FieldsConsumer ret =
         new AppendingTermsWriter(
             state,
             docsWriter,
             BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
             BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
     success = true;
     return ret;
   } finally {
     if (!success) {
       docsWriter.close();
     }
   }
 }
Пример #7
0
  @Override
  public void startDoc(int docID, int termDocFreq) throws IOException {
    assert docID >= 0 : "got docID=" + docID;

    /*
    if (termID != -1) {
      if (docID == 0) {
        baseDocID = termID;
      } else if (baseDocID + docID != termID) {
        throw new RuntimeException("WRITE: baseDocID=" + baseDocID + " docID=" + docID + " termID=" + termID);
      }
    }
    */

    if (DEBUG) System.out.println("PW     doc=" + docID);

    if (pendingCount == pending.length) {
      push();
      if (DEBUG) System.out.println("PW: wrapped.finishDoc");
      wrappedPostingsWriter.finishDoc();
    }

    if (pendingCount != -1) {
      assert pendingCount < pending.length;
      currentDoc = pending[pendingCount];
      currentDoc.docID = docID;
      if (indexOptions == IndexOptions.DOCS_ONLY) {
        pendingCount++;
      } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
        pendingCount++;
        currentDoc.termFreq = termDocFreq;
      } else {
        currentDoc.termFreq = termDocFreq;
      }
    } else {
      // We've already seen too many docs for this term --
      // just forward to our fallback writer
      wrappedPostingsWriter.startDoc(docID, termDocFreq);
    }
  }
Пример #8
0
  @Override
  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset)
      throws IOException {

    if (DEBUG)
      System.out.println(
          "PW       pos="
              + position
              + " payload="
              + (payload == null ? "null" : payload.length + " bytes"));
    if (pendingCount == pending.length) {
      push();
    }

    if (pendingCount == -1) {
      // We've already seen too many docs for this term --
      // just forward to our fallback writer
      wrappedPostingsWriter.addPosition(position, payload, startOffset, endOffset);
    } else {
      // buffer up
      final Position pos = pending[pendingCount++];
      pos.pos = position;
      pos.startOffset = startOffset;
      pos.endOffset = endOffset;
      pos.docID = currentDoc.docID;
      if (payload != null && payload.length > 0) {
        if (pos.payload == null) {
          pos.payload = BytesRef.deepCopyOf(payload);
        } else {
          pos.payload.copyBytes(payload);
        }
      } else if (pos.payload != null) {
        pos.payload.length = 0;
      }
    }
  }
Пример #9
0
  // Pushes pending positions to the wrapped codec
  private void push() throws IOException {
    if (DEBUG)
      System.out.println("PW now push @ " + pendingCount + " wrapped=" + wrappedPostingsWriter);
    assert pendingCount == pending.length;

    wrappedPostingsWriter.startTerm();

    // Flush all buffered docs
    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
      Position doc = null;
      for (Position pos : pending) {
        if (doc == null) {
          doc = pos;
          if (DEBUG)
            System.out.println("PW: wrapped.startDoc docID=" + doc.docID + " tf=" + doc.termFreq);
          wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
        } else if (doc.docID != pos.docID) {
          assert pos.docID > doc.docID;
          if (DEBUG) System.out.println("PW: wrapped.finishDoc");
          wrappedPostingsWriter.finishDoc();
          doc = pos;
          if (DEBUG)
            System.out.println("PW: wrapped.startDoc docID=" + doc.docID + " tf=" + doc.termFreq);
          wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
        }
        if (DEBUG) System.out.println("PW:   wrapped.addPos pos=" + pos.pos);
        wrappedPostingsWriter.addPosition(pos.pos, pos.payload, pos.startOffset, pos.endOffset);
      }
      // wrappedPostingsWriter.finishDoc();
    } else {
      for (Position doc : pending) {
        wrappedPostingsWriter.startDoc(
            doc.docID, indexOptions == IndexOptions.DOCS_ONLY ? 0 : doc.termFreq);
      }
    }
    pendingCount = -1;
  }
  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    // we pull this before the seed intentionally: because its not consumed at runtime
    // (the skipInterval is written into postings header)
    int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10);

    if (LuceneTestCase.VERBOSE) {
      System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
    }

    final long seed = seedRandom.nextLong();

    if (LuceneTestCase.VERBOSE) {
      System.out.println(
          "MockRandomCodec: writing to seg="
              + state.segmentName
              + " formatID="
              + state.segmentSuffix
              + " seed="
              + seed);
    }

    final String seedFileName =
        IndexFileNames.segmentFileName(state.segmentName, state.segmentSuffix, SEED_EXT);
    final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
    try {
      out.writeLong(seed);
    } finally {
      out.close();
    }

    final Random random = new Random(seed);

    random.nextInt(); // consume a random for buffersize

    PostingsWriterBase postingsWriter;
    if (random.nextBoolean()) {
      postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval);
    } else {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing Standard postings");
      }
      postingsWriter = new Lucene40PostingsWriter(state, skipInterval);
    }

    if (random.nextBoolean()) {
      final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
      if (LuceneTestCase.VERBOSE) {
        System.out.println(
            "MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
      }
      postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter);
    }

    final FieldsConsumer fields;

    if (random.nextBoolean()) {
      // Use BlockTree terms dict

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing BlockTree terms dict");
      }

      // TODO: would be nice to allow 1 but this is very
      // slow to write
      final int minTermsInBlock = _TestUtil.nextInt(random, 2, 100);
      final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));

      boolean success = false;
      try {
        fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
        success = true;
      } finally {
        if (!success) {
          postingsWriter.close();
        }
      }
    } else {

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing Block terms dict");
      }

      boolean success = false;

      final TermsIndexWriterBase indexWriter;
      try {
        if (random.nextBoolean()) {
          state.termIndexInterval = _TestUtil.nextInt(random, 1, 100);
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")");
          }
          indexWriter = new FixedGapTermsIndexWriter(state);
        } else {
          final VariableGapTermsIndexWriter.IndexTermSelector selector;
          final int n2 = random.nextInt(3);
          if (n2 == 0) {
            final int tii = _TestUtil.nextInt(random, 1, 100);
            selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
            if (LuceneTestCase.VERBOSE) {
              System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
            }
          } else if (n2 == 1) {
            final int docFreqThresh = _TestUtil.nextInt(random, 2, 100);
            final int tii = _TestUtil.nextInt(random, 1, 100);
            selector =
                new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
          } else {
            final long seed2 = random.nextLong();
            final int gap = _TestUtil.nextInt(random, 2, 40);
            if (LuceneTestCase.VERBOSE) {
              System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
            }
            selector =
                new VariableGapTermsIndexWriter.IndexTermSelector() {
                  final Random rand = new Random(seed2);

                  @Override
                  public boolean isIndexTerm(BytesRef term, TermStats stats) {
                    return rand.nextInt(gap) == gap / 2;
                  }

                  @Override
                  public void newField(FieldInfo fieldInfo) {}
                };
          }
          indexWriter = new VariableGapTermsIndexWriter(state, selector);
        }
        success = true;
      } finally {
        if (!success) {
          postingsWriter.close();
        }
      }

      success = false;
      try {
        fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
        success = true;
      } finally {
        if (!success) {
          try {
            postingsWriter.close();
          } finally {
            indexWriter.close();
          }
        }
      }
    }

    return fields;
  }
Пример #11
0
 @Override
 public void close() throws IOException {
   wrappedPostingsWriter.close();
 }
Пример #12
0
  /** Called when we are done adding docs to this term */
  @Override
  public void finishTerm(TermStats stats) throws IOException {
    if (DEBUG)
      System.out.println(
          "PW   finishTerm docCount="
              + stats.docFreq
              + " pendingCount="
              + pendingCount
              + " pendingTerms.size()="
              + pendingTerms.size());

    assert pendingCount > 0 || pendingCount == -1;

    if (pendingCount == -1) {
      wrappedPostingsWriter.finishTerm(stats);
      // Must add null entry to record terms that our
      // wrapped postings impl added
      pendingTerms.add(null);
    } else {

      // There were few enough total occurrences for this
      // term, so we fully inline our postings data into
      // terms dict, now:

      // TODO: it'd be better to share this encoding logic
      // in some inner codec that knows how to write a
      // single doc / single position, etc.  This way if a
      // given codec wants to store other interesting
      // stuff, it could use this pulsing codec to do so

      if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
        int lastDocID = 0;
        int pendingIDX = 0;
        int lastPayloadLength = -1;
        int lastOffsetLength = -1;
        while (pendingIDX < pendingCount) {
          final Position doc = pending[pendingIDX];

          final int delta = doc.docID - lastDocID;
          lastDocID = doc.docID;

          if (DEBUG) System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);

          if (doc.termFreq == 1) {
            buffer.writeVInt((delta << 1) | 1);
          } else {
            buffer.writeVInt(delta << 1);
            buffer.writeVInt(doc.termFreq);
          }

          int lastPos = 0;
          int lastOffset = 0;
          for (int posIDX = 0; posIDX < doc.termFreq; posIDX++) {
            final Position pos = pending[pendingIDX++];
            assert pos.docID == doc.docID;
            final int posDelta = pos.pos - lastPos;
            lastPos = pos.pos;
            if (DEBUG) System.out.println("    write pos=" + pos.pos);
            final int payloadLength = pos.payload == null ? 0 : pos.payload.length;
            if (storePayloads) {
              if (payloadLength != lastPayloadLength) {
                buffer.writeVInt((posDelta << 1) | 1);
                buffer.writeVInt(payloadLength);
                lastPayloadLength = payloadLength;
              } else {
                buffer.writeVInt(posDelta << 1);
              }
            } else {
              buffer.writeVInt(posDelta);
            }

            if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                >= 0) {
              // System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
              int offsetDelta = pos.startOffset - lastOffset;
              int offsetLength = pos.endOffset - pos.startOffset;
              if (offsetLength != lastOffsetLength) {
                buffer.writeVInt(offsetDelta << 1 | 1);
                buffer.writeVInt(offsetLength);
              } else {
                buffer.writeVInt(offsetDelta << 1);
              }
              lastOffset = pos.startOffset;
              lastOffsetLength = offsetLength;
            }

            if (payloadLength > 0) {
              assert storePayloads;
              buffer.writeBytes(pos.payload.bytes, 0, pos.payload.length);
            }
          }
        }
      } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
        int lastDocID = 0;
        for (int posIDX = 0; posIDX < pendingCount; posIDX++) {
          final Position doc = pending[posIDX];
          final int delta = doc.docID - lastDocID;
          assert doc.termFreq != 0;
          if (doc.termFreq == 1) {
            buffer.writeVInt((delta << 1) | 1);
          } else {
            buffer.writeVInt(delta << 1);
            buffer.writeVInt(doc.termFreq);
          }
          lastDocID = doc.docID;
        }
      } else if (indexOptions == IndexOptions.DOCS_ONLY) {
        int lastDocID = 0;
        for (int posIDX = 0; posIDX < pendingCount; posIDX++) {
          final Position doc = pending[posIDX];
          buffer.writeVInt(doc.docID - lastDocID);
          lastDocID = doc.docID;
        }
      }

      final byte[] bytes = new byte[(int) buffer.getFilePointer()];
      buffer.writeTo(bytes, 0);
      pendingTerms.add(new PendingTerm(bytes));
      buffer.reset();
    }

    pendingCount = 0;
  }