public void testNRTAndCommit() throws Exception {
    Directory dir = newDirectory();
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
    final LineFileDocs docs = new LineFileDocs(random(), true);
    final int numDocs = TestUtil.nextInt(random(), 100, 400);

    if (VERBOSE) {
      System.out.println("TEST: numDocs=" + numDocs);
    }

    final List<BytesRef> ids = new ArrayList<>();
    DirectoryReader r = null;
    for (int docCount = 0; docCount < numDocs; docCount++) {
      final Document doc = docs.nextDoc();
      ids.add(new BytesRef(doc.get("docid")));
      w.addDocument(doc);
      if (random().nextInt(20) == 17) {
        if (r == null) {
          r = DirectoryReader.open(w.w);
        } else {
          final DirectoryReader r2 = DirectoryReader.openIfChanged(r);
          if (r2 != null) {
            r.close();
            r = r2;
          }
        }
        assertEquals(1 + docCount, r.numDocs());
        final IndexSearcher s = newSearcher(r);
        // Just make sure search can run; we can't assert
        // totHits since it could be 0
        TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
        // System.out.println("tot hits " + hits.totalHits);
      }
    }

    if (r != null) {
      r.close();
    }

    // Close should force cache to clear since all files are sync'd
    w.close();

    final String[] cachedFiles = cachedDir.listCachedFiles();
    for (String file : cachedFiles) {
      System.out.println("FAIL: cached file " + file + " remains after sync");
    }
    assertEquals(0, cachedFiles.length);

    r = DirectoryReader.open(dir);
    for (BytesRef id : ids) {
      assertEquals(1, r.docFreq(new Term("docid", id)));
    }
    r.close();
    cachedDir.close();
    docs.close();
  }
  public void testDuellMemIndex() throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    int numDocs = atLeast(10);
    MemoryIndex memory = randomMemoryIndex();
    for (int i = 0; i < numDocs; i++) {
      Directory dir = newDirectory();
      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
      mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
      Document nextDoc = lineFileDocs.nextDoc();
      Document doc = new Document();
      for (IndexableField field : nextDoc.getFields()) {
        if (field.fieldType().indexOptions() != IndexOptions.NONE) {
          doc.add(field);
          if (random().nextInt(3) == 0) {
            doc.add(field); // randomly add the same field twice
          }
        }
      }

      writer.addDocument(doc);
      writer.close();
      for (IndexableField field : doc) {
        memory.addField(field.name(), ((Field) field).stringValue(), mockAnalyzer);
      }
      DirectoryReader competitor = DirectoryReader.open(dir);
      LeafReader memIndexReader = (LeafReader) memory.createSearcher().getIndexReader();
      TestUtil.checkReader(memIndexReader);
      duellReaders(competitor, memIndexReader);
      IOUtils.close(competitor, memIndexReader);
      memory.reset();
      dir.close();
    }
    lineFileDocs.close();
  }
  public void test() throws Exception {
    final Directory d = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final MyIndexWriter w =
        new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

    // Try to make an index that requires merging:
    w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
    final int numStartDocs = atLeast(20);
    final LineFileDocs docs = new LineFileDocs(random(), true);
    for (int docIDX = 0; docIDX < numStartDocs; docIDX++) {
      w.addDocument(docs.nextDoc());
    }
    MergePolicy mp = w.getConfig().getMergePolicy();
    final int mergeAtOnce = 1 + w.segmentInfos.size();
    if (mp instanceof TieredMergePolicy) {
      ((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeAtOnce);
    } else if (mp instanceof LogMergePolicy) {
      ((LogMergePolicy) mp).setMergeFactor(mergeAtOnce);
    } else {
      // skip test
      w.close();
      d.close();
      return;
    }

    final AtomicBoolean doStop = new AtomicBoolean();
    w.getConfig().setMaxBufferedDocs(2);
    Thread t =
        new Thread() {
          @Override
          public void run() {
            try {
              while (!doStop.get()) {
                w.updateDocument(
                    new Term("docid", "" + random().nextInt(numStartDocs)), docs.nextDoc());
                // Force deletes to apply
                w.getReader().close();
              }
            } catch (Throwable t) {
              throw new RuntimeException(t);
            }
          }
        };
    t.start();
    w.forceMerge(1);
    doStop.set(true);
    t.join();
    assertTrue("merge count is " + w.mergeCount.get(), w.mergeCount.get() <= 1);
    w.close();
    d.close();
    docs.close();
  }
Exemplo n.º 4
0
  // make sure we never reuse from another reader even if it is the same field & codec etc
  public void testReuseDocsEnumDifferentReader() throws IOException {
    Directory dir = newDirectory();
    Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    RandomIndexWriter writer =
        new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();

    DirectoryReader firstReader = DirectoryReader.open(dir);
    DirectoryReader secondReader = DirectoryReader.open(dir);
    List<LeafReaderContext> leaves = firstReader.leaves();
    List<LeafReaderContext> leaves2 = secondReader.leaves();

    for (LeafReaderContext ctx : leaves) {
      Terms terms = ctx.reader().terms("body");
      TermsEnum iterator = terms.iterator();
      IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>();
      MatchNoBits bits = new Bits.MatchNoBits(firstReader.maxDoc());
      iterator = terms.iterator();
      PostingsEnum docs = null;
      BytesRef term = null;
      while ((term = iterator.next()) != null) {
        docs =
            iterator.postings(
                null,
                randomDocsEnum("body", term, leaves2, bits),
                random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
        enums.put(docs, true);
      }
      assertEquals(terms.size(), enums.size());

      iterator = terms.iterator();
      enums.clear();
      docs = null;
      while ((term = iterator.next()) != null) {
        docs =
            iterator.postings(
                bits,
                randomDocsEnum("body", term, leaves2, bits),
                random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
        enums.put(docs, true);
      }
      assertEquals(terms.size(), enums.size());
    }
    writer.close();
    IOUtils.close(firstReader, secondReader, dir);
  }
  public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception {

    final String field_name = "text";
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    if (random().nextBoolean()) {
      mockAnalyzer.setOffsetGap(random().nextInt(100));
    }
    // index into a random directory
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();

    Document doc = new Document();
    doc.add(new Field(field_name, "la la", type));
    doc.add(new Field(field_name, "foo bar foo bar foo", type));

    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.updateDocument(new Term("id", "1"), doc);
    writer.commit();
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);

    // Index document in Memory index
    MemoryIndex memIndex = new MemoryIndex(true);
    memIndex.addField(field_name, "la la", mockAnalyzer);
    memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer);

    // compare term vectors
    Terms ramTv = reader.getTermVector(0, field_name);
    IndexReader memIndexReader = memIndex.createSearcher().getIndexReader();
    TestUtil.checkReader(memIndexReader);
    Terms memTv = memIndexReader.getTermVector(0, field_name);

    compareTermVectors(ramTv, memTv, field_name);
    memIndexReader.close();
    reader.close();
    dir.close();
  }
 public void testSameFieldAddedMultipleTimes() throws IOException {
   MemoryIndex mindex = randomMemoryIndex();
   MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
   mindex.addField("field", "the quick brown fox", mockAnalyzer);
   mindex.addField("field", "jumps over the", mockAnalyzer);
   LeafReader reader = (LeafReader) mindex.createSearcher().getIndexReader();
   TestUtil.checkReader(reader);
   assertEquals(7, reader.terms("field").getSumTotalTermFreq());
   PhraseQuery query = new PhraseQuery("field", "fox", "jumps");
   assertTrue(mindex.search(query) > 0.1);
   mindex.reset();
   mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10));
   mindex.addField("field", "the quick brown fox", mockAnalyzer);
   mindex.addField("field", "jumps over the", mockAnalyzer);
   assertEquals(0, mindex.search(query), 0.00001f);
   query = new PhraseQuery(10, "field", "fox", "jumps");
   assertTrue(
       "posGap" + mockAnalyzer.getPositionIncrementGap("field"), mindex.search(query) > 0.0001);
   TestUtil.checkReader(mindex.createSearcher().getIndexReader());
 }
Exemplo n.º 7
0
  @Test
  public void testRollingUpdates() throws Exception {
    Random random = new Random(random().nextLong());
    final BaseDirectoryWrapper dir = newDirectory();
    // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to
    // delete files"
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
    }

    final LineFileDocs docs = new LineFileDocs(random, true);

    // provider.register(new MemoryCodec());
    if (random().nextBoolean()) {
      Codec.setDefault(
          TestUtil.alwaysPostingsFormat(
              new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
    IndexSearcher s = null;
    final int numUpdates =
        (int)
            (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble())));
    if (VERBOSE) {
      System.out.println("TEST: numUpdates=" + numUpdates);
    }
    int updateCount = 0;
    // TODO: sometimes update ids not in order...
    for (int docIter = 0; docIter < numUpdates; docIter++) {
      final Document doc = docs.nextDoc();
      final String myID = Integer.toString(id);
      if (id == SIZE - 1) {
        id = 0;
      } else {
        id++;
      }
      if (VERBOSE) {
        System.out.println("  docIter=" + docIter + " id=" + id);
      }
      ((Field) doc.getField("docid")).setStringValue(myID);

      Term idTerm = new Term("docid", myID);

      final boolean doUpdate;
      if (s != null && updateCount < SIZE) {
        TopDocs hits = s.search(new TermQuery(idTerm), 1);
        assertEquals(1, hits.totalHits);
        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
        if (VERBOSE) {
          if (doUpdate) {
            System.out.println("  tryDeleteDocument failed");
          } else {
            System.out.println("  tryDeleteDocument succeeded");
          }
        }
      } else {
        doUpdate = true;
        if (VERBOSE) {
          System.out.println("  no searcher: doUpdate=true");
        }
      }

      updateCount++;

      if (doUpdate) {
        if (random().nextBoolean()) {
          w.updateDocument(idTerm, doc);
        } else {
          // It's OK to not be atomic for this test (no separate thread reopening readers):
          w.deleteDocuments(new TermQuery(idTerm));
          w.addDocument(doc);
        }
      } else {
        w.addDocument(doc);
      }

      if (docIter >= SIZE && random().nextInt(50) == 17) {
        if (r != null) {
          r.close();
        }

        final boolean applyDeletions = random().nextBoolean();

        if (VERBOSE) {
          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
        }

        r = w.getReader(applyDeletions);
        if (applyDeletions) {
          s = newSearcher(r);
        } else {
          s = null;
        }
        assertTrue(
            "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE,
            !applyDeletions || r.numDocs() == SIZE);
        updateCount = 0;
      }
    }

    if (r != null) {
      r.close();
    }

    w.commit();
    assertEquals(SIZE, w.numDocs());

    w.close();

    TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates");

    docs.close();

    // LUCENE-4455:
    SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    long totalBytes = 0;
    for (SegmentCommitInfo sipc : infos) {
      totalBytes += sipc.sizeInBytes();
    }
    long totalBytes2 = 0;

    for (String fileName : dir.listAll()) {
      if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) {
        totalBytes2 += dir.fileLength(fileName);
      }
    }
    assertEquals(totalBytes2, totalBytes);
    dir.close();
  }
  public void runTest(String testName) throws Exception {

    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);

    final long t0 = System.currentTimeMillis();

    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random, true);
    final Path tempDir = createTempDir(testName);
    dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
    if (dir instanceof BaseDirectoryWrapper) {
      ((BaseDirectoryWrapper) dir)
          .setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
      ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
    }

    if (LuceneTestCase.TEST_NIGHTLY) {
      // newIWConfig makes smallish max seg size, which
      // results in tons and tons of segments for this test
      // when run nightly:
      MergePolicy mp = conf.getMergePolicy();
      if (mp instanceof TieredMergePolicy) {
        ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
      } else if (mp instanceof LogByteSizeMergePolicy) {
        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
      } else if (mp instanceof LogMergePolicy) {
        ((LogMergePolicy) mp).setMaxMergeDocs(100000);
      }
    }

    conf.setMergedSegmentWarmer(
        new IndexWriter.IndexReaderWarmer() {
          @Override
          public void warm(LeafReader reader) throws IOException {
            if (VERBOSE) {
              System.out.println("TEST: now warm merged reader=" + reader);
            }
            warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
            final int maxDoc = reader.maxDoc();
            final Bits liveDocs = reader.getLiveDocs();
            int sum = 0;
            final int inc = Math.max(1, maxDoc / 50);
            for (int docID = 0; docID < maxDoc; docID += inc) {
              if (liveDocs == null || liveDocs.get(docID)) {
                final StoredDocument doc = reader.document(docID);
                sum += doc.getFields().size();
              }
            }

            IndexSearcher searcher = newSearcher(reader);
            sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;

            if (VERBOSE) {
              System.out.println("TEST: warm visited " + sum + " fields");
            }
          }
        });

    if (VERBOSE) {
      conf.setInfoStream(
          new PrintStreamInfoStream(System.out) {
            @Override
            public void message(String component, String message) {
              if ("TP".equals(component)) {
                return; // ignore test points!
              }
              super.message(component, message);
            }
          });
    }
    writer = new IndexWriter(dir, conf);
    TestUtil.reduceOpenFiles(writer);

    final ExecutorService es =
        random().nextBoolean()
            ? null
            : Executors.newCachedThreadPool(new NamedThreadFactory(testName));

    doAfterWriter(es);

    final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);

    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());

    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;

    final Thread[] indexThreads =
        launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

    if (VERBOSE) {
      System.out.println(
          "TEST: DONE start "
              + NUM_INDEX_THREADS
              + " indexing threads ["
              + (System.currentTimeMillis() - t0)
              + " ms]");
    }

    // Let index build up a bit
    Thread.sleep(100);

    doSearching(es, stopTime);

    if (VERBOSE) {
      System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
    }

    for (int thread = 0; thread < indexThreads.length; thread++) {
      indexThreads[thread].join();
    }

    if (VERBOSE) {
      System.out.println(
          "TEST: done join indexing threads ["
              + (System.currentTimeMillis() - t0)
              + " ms]; addCount="
              + addCount
              + " delCount="
              + delCount);
    }

    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
      System.out.println("TEST: finalSearcher=" + s);
    }

    assertFalse(failed.get());

    boolean doFail = false;

    // Verify: make sure delIDs are in fact deleted:
    for (String id : delIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println(
            "doc id="
                + id
                + " is supposed to be deleted, but got "
                + hits.totalHits
                + " hits; first docID="
                + hits.scoreDocs[0].doc);
        doFail = true;
      }
    }

    // Verify: make sure delPackIDs are in fact deleted:
    for (String id : delPackIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println(
            "packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
        doFail = true;
      }
    }

    // Verify: make sure each group of sub-docs are still in docID order:
    for (SubDocs subDocs : allSubDocs) {
      TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
      if (!subDocs.deleted) {
        // We sort by relevance but the scores should be identical so sort falls back to by docID:
        if (hits.totalHits != subDocs.subIDs.size()) {
          System.out.println(
              "packID="
                  + subDocs.packID
                  + ": expected "
                  + subDocs.subIDs.size()
                  + " hits but got "
                  + hits.totalHits);
          doFail = true;
        } else {
          int lastDocID = -1;
          int startDocID = -1;
          for (ScoreDoc scoreDoc : hits.scoreDocs) {
            final int docID = scoreDoc.doc;
            if (lastDocID != -1) {
              assertEquals(1 + lastDocID, docID);
            } else {
              startDocID = docID;
            }
            lastDocID = docID;
            final StoredDocument doc = s.doc(docID);
            assertEquals(subDocs.packID, doc.get("packID"));
          }

          lastDocID = startDocID - 1;
          for (String subID : subDocs.subIDs) {
            hits = s.search(new TermQuery(new Term("docid", subID)), 1);
            assertEquals(1, hits.totalHits);
            final int docID = hits.scoreDocs[0].doc;
            if (lastDocID != -1) {
              assertEquals(1 + lastDocID, docID);
            }
            lastDocID = docID;
          }
        }
      } else {
        // Pack was deleted -- make sure its docs are
        // deleted.  We can't verify packID is deleted
        // because we can re-use packID for update:
        for (String subID : subDocs.subIDs) {
          assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
        }
      }
    }

    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();

    for (int id = 0; id < endID; id++) {
      String stringID = "" + id;
      if (!delIDs.contains(stringID)) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
        if (hits.totalHits != 1) {
          System.out.println(
              "doc id="
                  + stringID
                  + " is not supposed to be deleted, but got hitCount="
                  + hits.totalHits
                  + "; delIDs="
                  + delIDs);
          doFail = true;
        }
      }
    }
    assertFalse(doFail);

    assertEquals(
        "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount,
        addCount.get() - delCount.get(),
        s.getIndexReader().numDocs());
    releaseSearcher(s);

    writer.commit();

    assertEquals(
        "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount,
        addCount.get() - delCount.get(),
        writer.numDocs());

    doClose();

    try {
      writer.commit();
    } finally {
      writer.close();
    }

    // Cannot close until after writer is closed because
    // writer has merged segment warmer that uses IS to run
    // searches, and that IS may be using this es!
    if (es != null) {
      es.shutdown();
      es.awaitTermination(1, TimeUnit.SECONDS);
    }

    TestUtil.checkIndex(dir);
    dir.close();
    IOUtils.rm(tempDir);

    if (VERBOSE) {
      System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
  }