Exemplos de IndexWriter.optimize em Java

Exemplo n.º 1

0

Exibir arquivo

Arquivo: TestIndexWriterDelete.java Projeto: rappazzo/PartyDJ

  // test the simple case
  public void testSimpleCase() throws IOException {
    String[] keywords = {"1", "2"};
    String[] unindexed = {"Netherlands", "Italy"};
    String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"};
    String[] text = {"Amsterdam", "Venice"};

    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setUseCompoundFile(true);
    modifier.setMaxBufferedDeleteTerms(1);

    for (int i = 0; i < keywords.length; i++) {
      Document doc = new Document();
      doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
      doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
      doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
      doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
      modifier.addDocument(doc);
    }
    modifier.optimize();
    modifier.commit();

    Term term = new Term("city", "Amsterdam");
    int hitCount = getHitCount(dir, term);
    assertEquals(1, hitCount);
    modifier.deleteDocuments(term);
    modifier.commit();
    hitCount = getHitCount(dir, term);
    assertEquals(0, hitCount);

    modifier.close();
    dir.close();
  }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: TestIndexWriterOnDiskFull.java Projeto: bpgriner01/pylucene

  /*
  Test: make sure when we run out of disk space or hit
  random IOExceptions in any of the addIndexes(*) calls
  that 1) index is not corrupt (searcher can open/search
  it) and 2) transactional semantics are followed:
  either all or none of the incoming documents were in
  fact added.
   */
  public void testAddIndexOnDiskFull() throws IOException {
    int START_COUNT = 57;
    int NUM_DIR = 50;
    int END_COUNT = START_COUNT + NUM_DIR * 25;

    // Build up a bunch of dirs that have indexes which we
    // will then merge together by calling addIndexes(*):
    Directory[] dirs = new Directory[NUM_DIR];
    long inputDiskUsage = 0;
    for (int i = 0; i < NUM_DIR; i++) {
      dirs[i] = newDirectory();
      IndexWriter writer =
          new IndexWriter(
              dirs[i], newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
      for (int j = 0; j < 25; j++) {
        addDocWithIndex(writer, 25 * i + j);
      }
      writer.close();
      String[] files = dirs[i].listAll();
      for (int j = 0; j < files.length; j++) {
        inputDiskUsage += dirs[i].fileLength(files[j]);
      }
    }

    // Now, build a starting index that has START_COUNT docs.  We
    // will then try to addIndexesNoOptimize into a copy of this:
    MockDirectoryWrapper startDir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
    for (int j = 0; j < START_COUNT; j++) {
      addDocWithIndex(writer, j);
    }
    writer.close();

    // Make sure starting index seems to be working properly:
    Term searchTerm = new Term("content", "aaa");
    IndexReader reader = IndexReader.open(startDir, true);
    assertEquals("first docFreq", 57, reader.docFreq(searchTerm));

    IndexSearcher searcher = newSearcher(reader);
    ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
    assertEquals("first number of hits", 57, hits.length);
    searcher.close();
    reader.close();

    // Iterate with larger and larger amounts of free
    // disk space.  With little free disk space,
    // addIndexes will certainly run out of space &
    // fail.  Verify that when this happens, index is
    // not corrupt and index in fact has added no
    // documents.  Then, we increase disk space by 2000
    // bytes each iteration.  At some point there is
    // enough free disk space and addIndexes should
    // succeed and index should show all documents were
    // added.

    // String[] files = startDir.listAll();
    long diskUsage = startDir.sizeInBytes();

    long startDiskUsage = 0;
    String[] files = startDir.listAll();
    for (int i = 0; i < files.length; i++) {
      startDiskUsage += startDir.fileLength(files[i]);
    }

    for (int iter = 0; iter < 3; iter++) {

      if (VERBOSE) System.out.println("TEST: iter=" + iter);

      // Start with 100 bytes more than we are currently using:
      long diskFree = diskUsage + _TestUtil.nextInt(random, 50, 200);

      int method = iter;

      boolean success = false;
      boolean done = false;

      String methodName;
      if (0 == method) {
        methodName = "addIndexes(Directory[]) + optimize()";
      } else if (1 == method) {
        methodName = "addIndexes(IndexReader[])";
      } else {
        methodName = "addIndexes(Directory[])";
      }

      while (!done) {
        if (VERBOSE) {
          System.out.println("TEST: cycle...");
        }

        // Make a new dir that will enforce disk usage:
        MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir));
        writer =
            new IndexWriter(
                dir,
                newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
                    .setOpenMode(OpenMode.APPEND)
                    .setMergePolicy(newLogMergePolicy()));
        IOException err = null;
        writer.setInfoStream(VERBOSE ? System.out : null);

        MergeScheduler ms = writer.getConfig().getMergeScheduler();
        for (int x = 0; x < 2; x++) {
          if (ms instanceof ConcurrentMergeScheduler)
            // This test intentionally produces exceptions
            // in the threads that CMS launches; we don't
            // want to pollute test output with these.
            if (0 == x) ((ConcurrentMergeScheduler) ms).setSuppressExceptions();
            else ((ConcurrentMergeScheduler) ms).clearSuppressExceptions();

          // Two loops: first time, limit disk space &
          // throw random IOExceptions; second time, no
          // disk space limit:

          double rate = 0.05;
          double diskRatio = ((double) diskFree) / diskUsage;
          long thisDiskFree;

          String testName = null;

          if (0 == x) {
            thisDiskFree = diskFree;
            if (diskRatio >= 2.0) {
              rate /= 2;
            }
            if (diskRatio >= 4.0) {
              rate /= 2;
            }
            if (diskRatio >= 6.0) {
              rate = 0.0;
            }
            if (VERBOSE)
              testName =
                  "disk full test " + methodName + " with disk full at " + diskFree + " bytes";
          } else {
            thisDiskFree = 0;
            rate = 0.0;
            if (VERBOSE) testName = "disk full test " + methodName + " with unlimited disk space";
          }

          if (VERBOSE) System.out.println("\ncycle: " + testName);

          dir.setTrackDiskUsage(true);
          dir.setMaxSizeInBytes(thisDiskFree);
          dir.setRandomIOExceptionRate(rate);

          try {

            if (0 == method) {
              writer.addIndexes(dirs);
              writer.optimize();
            } else if (1 == method) {
              IndexReader readers[] = new IndexReader[dirs.length];
              for (int i = 0; i < dirs.length; i++) {
                readers[i] = IndexReader.open(dirs[i], true);
              }
              try {
                writer.addIndexes(readers);
              } finally {
                for (int i = 0; i < dirs.length; i++) {
                  readers[i].close();
                }
              }
            } else {
              writer.addIndexes(dirs);
            }

            success = true;
            if (VERBOSE) {
              System.out.println("  success!");
            }

            if (0 == x) {
              done = true;
            }

          } catch (IOException e) {
            success = false;
            err = e;
            if (VERBOSE) {
              System.out.println("  hit IOException: " + e);
              e.printStackTrace(System.out);
            }

            if (1 == x) {
              e.printStackTrace(System.out);
              fail(methodName + " hit IOException after disk space was freed up");
            }
          }

          // Make sure all threads from
          // ConcurrentMergeScheduler are done
          _TestUtil.syncConcurrentMerges(writer);

          if (VERBOSE) {
            System.out.println("  now test readers");
          }

          // Finally, verify index is not corrupt, and, if
          // we succeeded, we see all docs added, and if we
          // failed, we see either all docs or no docs added
          // (transactional semantics):
          try {
            reader = IndexReader.open(dir, true);
          } catch (IOException e) {
            e.printStackTrace(System.out);
            fail(testName + ": exception when creating IndexReader: " + e);
          }
          int result = reader.docFreq(searchTerm);
          if (success) {
            if (result != START_COUNT) {
              fail(
                  testName
                      + ": method did not throw exception but docFreq('aaa') is "
                      + result
                      + " instead of expected "
                      + START_COUNT);
            }
          } else {
            // On hitting exception we still may have added
            // all docs:
            if (result != START_COUNT && result != END_COUNT) {
              err.printStackTrace(System.out);
              fail(
                  testName
                      + ": method did throw exception but docFreq('aaa') is "
                      + result
                      + " instead of expected "
                      + START_COUNT
                      + " or "
                      + END_COUNT);
            }
          }

          searcher = newSearcher(reader);
          try {
            hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs;
          } catch (IOException e) {
            e.printStackTrace(System.out);
            fail(testName + ": exception when searching: " + e);
          }
          int result2 = hits.length;
          if (success) {
            if (result2 != result) {
              fail(
                  testName
                      + ": method did not throw exception but hits.length for search on term 'aaa' is "
                      + result2
                      + " instead of expected "
                      + result);
            }
          } else {
            // On hitting exception we still may have added
            // all docs:
            if (result2 != result) {
              err.printStackTrace(System.out);
              fail(
                  testName
                      + ": method did throw exception but hits.length for search on term 'aaa' is "
                      + result2
                      + " instead of expected "
                      + result);
            }
          }

          searcher.close();
          reader.close();
          if (VERBOSE) {
            System.out.println("  count is " + result);
          }

          if (done || result == END_COUNT) {
            break;
          }
        }

        if (VERBOSE) {
          System.out.println(
              "  start disk = "
                  + startDiskUsage
                  + "; input disk = "
                  + inputDiskUsage
                  + "; max used = "
                  + dir.getMaxUsedSizeInBytes());
        }

        if (done) {
          // Javadocs state that temp free Directory space
          // required is at most 2X total input size of
          // indices so let's make sure:
          assertTrue(
              "max free Directory space required exceeded 1X the total input index sizes during "
                  + methodName
                  + ": max temp usage = "
                  + (dir.getMaxUsedSizeInBytes() - startDiskUsage)
                  + " bytes vs limit="
                  + (2 * (startDiskUsage + inputDiskUsage))
                  + "; starting disk usage = "
                  + startDiskUsage
                  + " bytes; "
                  + "input index disk usage = "
                  + inputDiskUsage
                  + " bytes",
              (dir.getMaxUsedSizeInBytes() - startDiskUsage)
                  < 2 * (startDiskUsage + inputDiskUsage));
        }

        // Make sure we don't hit disk full during close below:
        dir.setMaxSizeInBytes(0);
        dir.setRandomIOExceptionRate(0.0);

        writer.close();

        // Wait for all BG threads to finish else
        // dir.close() will throw IOException because
        // there are still open files
        _TestUtil.syncConcurrentMerges(ms);

        dir.close();

        // Try again with more free space:
        diskFree +=
            TEST_NIGHTLY
                ? _TestUtil.nextInt(random, 4000, 8000)
                : _TestUtil.nextInt(random, 40000, 80000);
      }
    }

    startDir.close();
    for (Directory dir : dirs) dir.close();
  }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: TestCheckIndex.java Projeto: simplegeo/lucene-solr

  public void testDeletedDocs() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2));
    Document doc = new Document();
    doc.add(
        newField(
            "field",
            "aaa",
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    for (int i = 0; i < 19; i++) {
      writer.addDocument(doc);
    }
    writer.optimize();
    writer.close();
    IndexReader reader = IndexReader.open(dir, false);
    reader.deleteDocument(5);
    reader.close();

    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    CheckIndex checker = new CheckIndex(dir);
    checker.setInfoStream(new PrintStream(bos));
    if (VERBOSE) checker.setInfoStream(System.out);
    CheckIndex.Status indexStatus = checker.checkIndex();
    if (indexStatus.clean == false) {
      System.out.println("CheckIndex failed");
      System.out.println(bos.toString());
      fail();
    }

    final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
    assertTrue(seg.openReaderPassed);

    assertNotNull(seg.diagnostics);

    assertNotNull(seg.fieldNormStatus);
    assertNull(seg.fieldNormStatus.error);
    assertEquals(1, seg.fieldNormStatus.totFields);

    assertNotNull(seg.termIndexStatus);
    assertNull(seg.termIndexStatus.error);
    assertEquals(1, seg.termIndexStatus.termCount);
    assertEquals(19, seg.termIndexStatus.totFreq);
    assertEquals(18, seg.termIndexStatus.totPos);

    assertNotNull(seg.storedFieldStatus);
    assertNull(seg.storedFieldStatus.error);
    assertEquals(18, seg.storedFieldStatus.docCount);
    assertEquals(18, seg.storedFieldStatus.totFields);

    assertNotNull(seg.termVectorStatus);
    assertNull(seg.termVectorStatus.error);
    assertEquals(18, seg.termVectorStatus.docCount);
    assertEquals(18, seg.termVectorStatus.totVectors);

    assertTrue(seg.diagnostics.size() > 0);
    final List<String> onlySegments = new ArrayList<String>();
    onlySegments.add("_0");

    assertTrue(checker.checkIndex(onlySegments).clean == true);
    dir.close();
  }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: TestSegmentTermDocs.java Projeto: jeffzhengye/lablucene

  public void testSkipTo(int indexDivisor) throws IOException {
    Directory dir = new RAMDirectory();
    IndexWriter writer =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

    Term ta = new Term("content", "aaa");
    for (int i = 0; i < 10; i++) addDoc(writer, "aaa aaa aaa aaa");

    Term tb = new Term("content", "bbb");
    for (int i = 0; i < 16; i++) addDoc(writer, "bbb bbb bbb bbb");

    Term tc = new Term("content", "ccc");
    for (int i = 0; i < 50; i++) addDoc(writer, "ccc ccc ccc ccc");

    // assure that we deal with a single segment
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(dir);
    reader.setTermInfosIndexDivisor(indexDivisor);
    assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());

    TermDocs tdocs = reader.termDocs();

    // without optimization (assumption skipInterval == 16)

    // with next
    tdocs.seek(ta);
    assertTrue(tdocs.next());
    assertEquals(0, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(1, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(0));
    assertEquals(2, tdocs.doc());
    assertTrue(tdocs.skipTo(4));
    assertEquals(4, tdocs.doc());
    assertTrue(tdocs.skipTo(9));
    assertEquals(9, tdocs.doc());
    assertFalse(tdocs.skipTo(10));

    // without next
    tdocs.seek(ta);
    assertTrue(tdocs.skipTo(0));
    assertEquals(0, tdocs.doc());
    assertTrue(tdocs.skipTo(4));
    assertEquals(4, tdocs.doc());
    assertTrue(tdocs.skipTo(9));
    assertEquals(9, tdocs.doc());
    assertFalse(tdocs.skipTo(10));

    // exactly skipInterval documents and therefore with optimization

    // with next
    tdocs.seek(tb);
    assertTrue(tdocs.next());
    assertEquals(10, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(11, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(5));
    assertEquals(12, tdocs.doc());
    assertTrue(tdocs.skipTo(15));
    assertEquals(15, tdocs.doc());
    assertTrue(tdocs.skipTo(24));
    assertEquals(24, tdocs.doc());
    assertTrue(tdocs.skipTo(25));
    assertEquals(25, tdocs.doc());
    assertFalse(tdocs.skipTo(26));

    // without next
    tdocs.seek(tb);
    assertTrue(tdocs.skipTo(5));
    assertEquals(10, tdocs.doc());
    assertTrue(tdocs.skipTo(15));
    assertEquals(15, tdocs.doc());
    assertTrue(tdocs.skipTo(24));
    assertEquals(24, tdocs.doc());
    assertTrue(tdocs.skipTo(25));
    assertEquals(25, tdocs.doc());
    assertFalse(tdocs.skipTo(26));

    // much more than skipInterval documents and therefore with optimization

    // with next
    tdocs.seek(tc);
    assertTrue(tdocs.next());
    assertEquals(26, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.next());
    assertEquals(27, tdocs.doc());
    assertEquals(4, tdocs.freq());
    assertTrue(tdocs.skipTo(5));
    assertEquals(28, tdocs.doc());
    assertTrue(tdocs.skipTo(40));
    assertEquals(40, tdocs.doc());
    assertTrue(tdocs.skipTo(57));
    assertEquals(57, tdocs.doc());
    assertTrue(tdocs.skipTo(74));
    assertEquals(74, tdocs.doc());
    assertTrue(tdocs.skipTo(75));
    assertEquals(75, tdocs.doc());
    assertFalse(tdocs.skipTo(76));

    // without next
    tdocs.seek(tc);
    assertTrue(tdocs.skipTo(5));
    assertEquals(26, tdocs.doc());
    assertTrue(tdocs.skipTo(40));
    assertEquals(40, tdocs.doc());
    assertTrue(tdocs.skipTo(57));
    assertEquals(57, tdocs.doc());
    assertTrue(tdocs.skipTo(74));
    assertEquals(74, tdocs.doc());
    assertTrue(tdocs.skipTo(75));
    assertEquals(75, tdocs.doc());
    assertFalse(tdocs.skipTo(76));

    tdocs.close();
    reader.close();
    dir.close();
  }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: TestIndexWriterDelete.java Projeto: rappazzo/PartyDJ

  // This test tests that buffered deletes are cleared when
  // an Exception is hit during flush.
  public void testErrorAfterApplyDeletes() throws IOException {

    MockRAMDirectory.Failure failure =
        new MockRAMDirectory.Failure() {
          boolean sawMaybe = false;
          boolean failed = false;

          @Override
          public MockRAMDirectory.Failure reset() {
            sawMaybe = false;
            failed = false;
            return this;
          }

          @Override
          public void eval(MockRAMDirectory dir) throws IOException {
            if (sawMaybe && !failed) {
              boolean seen = false;
              StackTraceElement[] trace = new Exception().getStackTrace();
              for (int i = 0; i < trace.length; i++) {
                if ("applyDeletes".equals(trace[i].getMethodName())) {
                  seen = true;
                  break;
                }
              }
              if (!seen) {
                // Only fail once we are no longer in applyDeletes
                failed = true;
                throw new IOException("fail after applyDeletes");
              }
            }
            if (!failed) {
              StackTraceElement[] trace = new Exception().getStackTrace();
              for (int i = 0; i < trace.length; i++) {
                if ("applyDeletes".equals(trace[i].getMethodName())) {
                  sawMaybe = true;
                  break;
                }
              }
            }
          }
        };

    // create a couple of files

    String[] keywords = {"1", "2"};
    String[] unindexed = {"Netherlands", "Italy"};
    String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals"};
    String[] text = {"Amsterdam", "Venice"};

    MockRAMDirectory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setUseCompoundFile(true);
    modifier.setMaxBufferedDeleteTerms(2);

    dir.failOn(failure.reset());

    for (int i = 0; i < keywords.length; i++) {
      Document doc = new Document();
      doc.add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
      doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
      doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
      doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
      modifier.addDocument(doc);
    }
    // flush (and commit if ac)

    modifier.optimize();
    modifier.commit();

    // one of the two files hits

    Term term = new Term("city", "Amsterdam");
    int hitCount = getHitCount(dir, term);
    assertEquals(1, hitCount);

    // open the writer again (closed above)

    // delete the doc
    // max buf del terms is two, so this is buffered

    modifier.deleteDocuments(term);

    // add a doc (needed for the !ac case; see below)
    // doc remains buffered

    Document doc = new Document();
    modifier.addDocument(doc);

    // commit the changes, the buffered deletes, and the new doc

    // The failure object will fail on the first write after the del
    // file gets created when processing the buffered delete

    // in the ac case, this will be when writing the new segments
    // files so we really don't need the new doc, but it's harmless

    // in the !ac case, a new segments file won't be created but in
    // this case, creation of the cfs file happens next so we need
    // the doc (to test that it's okay that we don't lose deletes if
    // failing while creating the cfs file)

    boolean failed = false;
    try {
      modifier.commit();
    } catch (IOException ioe) {
      failed = true;
    }

    assertTrue(failed);

    // The commit above failed, so we need to retry it (which will
    // succeed, because the failure is a one-shot)

    modifier.commit();

    hitCount = getHitCount(dir, term);

    // Make sure the delete was successfully flushed:
    assertEquals(0, hitCount);

    modifier.close();
    dir.close();
  }