private void updateDoc(IndexWriter modifier, int id, int value) throws IOException {
   Document doc = new Document();
   doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
   doc.add(new Field("id", String.valueOf(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
   doc.add(new Field("value", String.valueOf(value), Field.Store.NO, Field.Index.NOT_ANALYZED));
   modifier.updateDocument(new Term("id", String.valueOf(id)), doc);
 }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
  protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
      if (field.storedPayloads) {
        mapping.put(
            field.name,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                filter = new TypeAsPayloadTokenFilter(filter);
                return new TokenStreamComponents(tokenizer, filter);
              }
            });
      }
    }
    PerFieldAnalyzerWrapper wrapper =
        new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
      Document d = new Document();
      d.add(new Field("id", doc.id, StringField.TYPE_STORED));
      for (int i = 0; i < doc.fieldContent.length; i++) {
        FieldType type = new FieldType(TextField.TYPE_STORED);
        TestFieldSetting fieldSetting = doc.fieldSettings[i];

        type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
        type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
        type.setStoreTermVectorPositions(
            fieldSetting.storedPositions
                || fieldSetting.storedPayloads
                || fieldSetting.storedOffset);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
      }
      writer.updateDocument(new Term("id", doc.id), d);
      writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
  }
Beispiel #4
0
  public void testWithPendingDeletes3() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // auxiliary directory
    Directory aux = newDirectory();

    setUpDirs(dir, aux);
    IndexWriter writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));

    // Adds 10 docs, then replaces them with another 10
    // docs, so 10 pending deletes:
    for (int i = 0; i < 20; i++) {
      Document doc = new Document();
      doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
      doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
      writer.updateDocument(new Term("id", "" + (i % 10)), doc);
    }

    // Deletes one of the 10 added docs, leaving 9:
    PhraseQuery q = new PhraseQuery();
    q.add(new Term("content", "bbb"));
    q.add(new Term("content", "14"));
    writer.deleteDocuments(q);

    writer.addIndexes(aux);

    writer.forceMerge(1);
    writer.commit();

    verifyNumDocs(dir, 1039);
    verifyTermDocs(dir, new Term("content", "aaa"), 1030);
    verifyTermDocs(dir, new Term("content", "bbb"), 9);

    writer.close();
    dir.close();
    aux.close();
  }
 @Override
 public void run() {
   try {
     DirectoryReader open = null;
     for (int i = 0; i < num; i++) {
       Document doc = new Document(); // docs.nextDoc();
       BytesRef br = new BytesRef("test");
       doc.add(newStringField("id", br, Field.Store.NO));
       writer.updateDocument(new Term("id", br), doc);
       if (random().nextInt(3) == 0) {
         if (open == null) {
           open = DirectoryReader.open(writer, true);
         }
         DirectoryReader reader = DirectoryReader.openIfChanged(open);
         if (reader != null) {
           open.close();
           open = reader;
         }
         assertEquals(
             "iter: "
                 + i
                 + " numDocs: "
                 + open.numDocs()
                 + " del: "
                 + open.numDeletedDocs()
                 + " max: "
                 + open.maxDoc(),
             1,
             open.numDocs());
       }
     }
     if (open != null) {
       open.close();
     }
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
  public static void updateDocument(String searchEngineId, long companyId, Document document)
      throws SearchException {

    if (isIndexReadOnly()) {
      return;
    }

    if (_log.isDebugEnabled()) {
      _log.debug("Document " + document.toString());
    }

    SearchEngine searchEngine = getSearchEngine(searchEngineId);

    IndexWriter indexWriter = searchEngine.getIndexWriter();

    _searchPermissionChecker.addPermissionFields(companyId, document);

    SearchContext searchContext = new SearchContext();

    searchContext.setCompanyId(companyId);
    searchContext.setSearchEngineId(searchEngineId);

    indexWriter.updateDocument(searchContext, document);
  }
  SingleIndex(String directoryName, String fileName, String indexPath) {
    try {
      Directory dir = FSDirectory.open(new File(indexPath)); // write to an directory for checking

      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);

      IndexWriter writer = new IndexWriter(dir, iwc);
      String[] files = directoryName.list();

      FileInputStream fis;
      try {
        fis = new FileInputStream(fileName);
      } catch (FileNotFoundException fnfe) {
        // at least on windows, some temporary files raise this exception with an "access denied"
        // message
        // checking if the file can be read doesn't help
        return;
      }

      try {

        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(
            new TextField(
                "contents",
                new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
          // New index, so we just add the document (no old document can be there):
          System.out.println("adding " + file);
          writer.addDocument(doc);
        } else {
          // Existing index (an old copy of this document may have been indexed) so
          // we use updateDocument instead to replace the old one matching the exact
          // path, if present:
          System.out.println("updating " + file);
          writer.updateDocument(new Term("path", file.getPath()), doc);
        }

      } finally {
        fis.close();
      }

      writer.close();

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
  /**
   * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then
   * creates ordered lists of reference object positions for each data item in the index with given
   * feature. Finally a new index (name "<indexPath>-ms") is created where all the original
   * documents as well as the new data are stored.
   *
   * @param indexPath the path to the original index
   * @throws IOException
   */
  public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
      throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
      referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
      System.err.println(
          "WARNING: There are deleted docs in your index. You should "
              + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
      count++;
      Document document = reader.document(i);
      document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
      iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      ImageSearchHits hits = searcher.search(document, readerRo);
      sb.delete(0, sb.length());
      for (int j = 0; j < numReferenceObjectsUsed; j++) {
        sb.append(hits.doc(j).getValues("ro-id")[0]);
        sb.append(' ');
      }
      // System.out.println(sb.toString());
      document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
      iw.updateDocument(
          new Term(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
          document);

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);
  }
Beispiel #9
0
  @Test
  public void testMixedChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        if (meta.checksum() == null) {
          String checksum = null;
          try {
            CodecUtil.retrieveChecksum(input);
            fail("expected a corrupt index - posting format has not checksums");
          } catch (CorruptIndexException
              | IndexFormatTooOldException
              | IndexFormatTooNewException ex) {
            try (ChecksumIndexInput checksumIndexInput =
                store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
              checksumIndexInput.seek(meta.length());
              checksum = Store.digestToString(checksumIndexInput.getChecksum());
            }
            // fine - it's a postings format without checksums
            checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null));
          }
        } else {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    checksums.write(store);
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      assertThat(
          "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue()));
      if (meta.hasLegacyChecksum()) {
        try (ChecksumIndexInput checksumIndexInput =
            store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
          checksumIndexInput.seek(meta.length());
          assertThat(
              meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum())));
        }
      } else {
        try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
  /**
   * Make sure if modifier tries to commit but hits disk full that modifier remains consistent and
   * usable. Similar to TestIndexReader.testDiskFull().
   */
  private void testOperationsOnDiskFull(boolean updates) throws IOException {

    boolean debug = false;
    Term searchTerm = new Term("content", "aaa");
    int START_COUNT = 157;
    int END_COUNT = 144;

    // First build up a starting index:
    MockRAMDirectory startDir = new MockRAMDirectory();
    IndexWriter writer =
        new IndexWriter(
            startDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 157; i++) {
      Document d = new Document();
      d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
      d.add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED));
      writer.addDocument(d);
    }
    writer.close();

    long diskUsage = startDir.sizeInBytes();
    long diskFree = diskUsage + 10;

    IOException err = null;

    boolean done = false;

    // Iterate w/ ever increasing free disk space:
    while (!done) {
      MockRAMDirectory dir = new MockRAMDirectory(startDir);
      dir.setPreventDoubleWrite(false);
      IndexWriter modifier =
          new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

      modifier.setMaxBufferedDocs(1000); // use flush or close
      modifier.setMaxBufferedDeleteTerms(1000); // use flush or close

      // For each disk size, first try to commit against
      // dir that will hit random IOExceptions & disk
      // full; after, give it infinite disk space & turn
      // off random IOExceptions & retry w/ same reader:
      boolean success = false;

      for (int x = 0; x < 2; x++) {

        double rate = 0.1;
        double diskRatio = ((double) diskFree) / diskUsage;
        long thisDiskFree;
        String testName;

        if (0 == x) {
          thisDiskFree = diskFree;
          if (diskRatio >= 2.0) {
            rate /= 2;
          }
          if (diskRatio >= 4.0) {
            rate /= 2;
          }
          if (diskRatio >= 6.0) {
            rate = 0.0;
          }
          if (debug) {
            System.out.println("\ncycle: " + diskFree + " bytes");
          }
          testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
        } else {
          thisDiskFree = 0;
          rate = 0.0;
          if (debug) {
            System.out.println("\ncycle: same writer: unlimited disk space");
          }
          testName = "reader re-use after disk full";
        }

        dir.setMaxSizeInBytes(thisDiskFree);
        dir.setRandomIOExceptionRate(rate, diskFree);

        try {
          if (0 == x) {
            int docId = 12;
            for (int i = 0; i < 13; i++) {
              if (updates) {
                Document d = new Document();
                d.add(
                    new Field(
                        "id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
                d.add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
                modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
              } else { // deletes
                modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
                // modifier.setNorm(docId, "contents", (float)2.0);
              }
              docId += 12;
            }
          }
          modifier.close();
          success = true;
          if (0 == x) {
            done = true;
          }
        } catch (IOException e) {
          if (debug) {
            System.out.println("  hit IOException: " + e);
            e.printStackTrace(System.out);
          }
          err = e;
          if (1 == x) {
            e.printStackTrace();
            fail(testName + " hit IOException after disk space was freed up");
          }
        }

        // If the close() succeeded, make sure there are
        // no unreferenced files.
        if (success) {
          _TestUtil.checkIndex(dir);
          TestIndexWriter.assertNoUnreferencedFiles(dir, "after writer.close");
        }

        // Finally, verify index is not corrupt, and, if
        // we succeeded, we see all docs changed, and if
        // we failed, we see either all docs or no docs
        // changed (transactional semantics):
        IndexReader newReader = null;
        try {
          newReader = IndexReader.open(dir, true);
        } catch (IOException e) {
          e.printStackTrace();
          fail(
              testName + ":exception when creating IndexReader after disk full during close: " + e);
        }

        IndexSearcher searcher = new IndexSearcher(newReader);
        ScoreDoc[] hits = null;
        try {
          hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
        } catch (IOException e) {
          e.printStackTrace();
          fail(testName + ": exception when searching: " + e);
        }
        int result2 = hits.length;
        if (success) {
          if (x == 0 && result2 != END_COUNT) {
            fail(
                testName
                    + ": method did not throw exception but hits.length for search on term 'aaa' is "
                    + result2
                    + " instead of expected "
                    + END_COUNT);
          } else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) {
            // It's possible that the first exception was
            // "recoverable" wrt pending deletes, in which
            // case the pending deletes are retained and
            // then re-flushing (with plenty of disk
            // space) will succeed in flushing the
            // deletes:
            fail(
                testName
                    + ": method did not throw exception but hits.length for search on term 'aaa' is "
                    + result2
                    + " instead of expected "
                    + START_COUNT
                    + " or "
                    + END_COUNT);
          }
        } else {
          // On hitting exception we still may have added
          // all docs:
          if (result2 != START_COUNT && result2 != END_COUNT) {
            err.printStackTrace();
            fail(
                testName
                    + ": method did throw exception but hits.length for search on term 'aaa' is "
                    + result2
                    + " instead of expected "
                    + START_COUNT
                    + " or "
                    + END_COUNT);
          }
        }

        searcher.close();
        newReader.close();

        if (result2 == END_COUNT) {
          break;
        }
      }

      dir.close();

      // Try again with 10 more bytes of free space:
      diskFree += 10;
    }
  }
Beispiel #11
0
  // IF THIS TEST FAILS ON UPGRADE GO LOOK AT THE
  // OldSIMockingCodec!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  @Test
  public void testWriteLegacyChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // set default codec - all segments need checksums
    final boolean usesOldCodec = randomBoolean();
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(usesOldCodec ? new OldSIMockingCodec() : actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed

    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
      if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
        continue;
      }
      BytesRef hash = new BytesRef();
      if (file.startsWith("segments")) {
        hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
      }
      StoreFileMetaData storeFileMetaData =
          new StoreFileMetaData(
              file, store.directory().fileLength(file), file + "checksum", null, hash);
      legacyMeta.put(file, storeFileMetaData);
      checksums.add(storeFileMetaData);
    }
    checksums.write(store);

    metadata = store.getMetadata();
    Map<String, StoreFileMetaData> stringStoreFileMetaDataMap = metadata.asMap();
    assertThat(legacyMeta.size(), equalTo(stringStoreFileMetaDataMap.size()));
    if (usesOldCodec) {
      for (StoreFileMetaData meta : legacyMeta.values()) {
        assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
        assertEquals(meta.name() + "checksum", meta.checksum());
        assertTrue(
            meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()),
            stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
      }
    } else {

      // even if we have a legacy checksum - if we use a new codec we should reuse
      for (StoreFileMetaData meta : legacyMeta.values()) {
        assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
        assertFalse(
            meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()),
            stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
        StoreFileMetaData storeFileMetaData = metadata.get(meta.name());
        try (IndexInput input =
            store.openVerifyingInput(meta.name(), IOContext.DEFAULT, storeFileMetaData)) {
          assertTrue(storeFileMetaData.toString(), input instanceof Store.VerifyingIndexInput);
          input.seek(meta.length());
          Store.verify(input);
        }
      }
    }
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
Beispiel #12
0
  @Test
  public void testCleanupFromSnapshot() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriterConfig indexWriterConfig =
        newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec());
    // we keep all commits and that allows us clean based on multiple snapshots
    indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
    int docs = 1 + random().nextInt(100);
    int numCommits = 0;
    for (int i = 0; i < docs; i++) {
      if (i > 0 && randomIntBetween(0, 10) == 0) {
        writer.commit();
        numCommits++;
      }
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (numCommits < 1) {
      writer.commit();
      Document doc = new Document();
      doc.add(
          new TextField(
              "id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }

    Store.MetadataSnapshot firstMeta = store.getMetadata();

    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot secondMeta = store.getMetadata();

    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
      if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
        continue;
      }
      BytesRef hash = new BytesRef();
      if (file.startsWith("segments")) {
        hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
      }
      StoreFileMetaData storeFileMetaData =
          new StoreFileMetaData(
              file, store.directory().fileLength(file), file + "checksum", null, hash);
      legacyMeta.put(file, storeFileMetaData);
      checksums.add(storeFileMetaData);
    }
    checksums.write(
        store); // write one checksum file here - we expect it to survive all the cleanups

    if (randomBoolean()) {
      store.cleanupAndVerify("test", firstMeta);
      String[] strings = store.directory().listAll();
      int numChecksums = 0;
      int numNotFound = 0;
      for (String file : strings) {
        assertTrue(firstMeta.contains(file) || Store.isChecksum(file));
        if (Store.isChecksum(file)) {
          numChecksums++;
        } else if (secondMeta.contains(file) == false) {
          numNotFound++;
        }
      }
      assertTrue(
          "at least one file must not be in here since we have two commits?", numNotFound > 0);
      assertEquals(
          "we wrote one checksum but it's gone now? - checksums are supposed to be kept",
          numChecksums,
          1);
    } else {
      store.cleanupAndVerify("test", secondMeta);
      String[] strings = store.directory().listAll();
      int numChecksums = 0;
      int numNotFound = 0;
      for (String file : strings) {
        assertTrue(secondMeta.contains(file) || Store.isChecksum(file));
        if (Store.isChecksum(file)) {
          numChecksums++;
        } else if (firstMeta.contains(file) == false) {
          numNotFound++;
        }
      }
      assertTrue(
          "at least one file must not be in here since we have two commits?", numNotFound > 0);
      assertEquals(
          "we wrote one checksum but it's gone now? - checksums are supposed to be kept",
          numChecksums,
          1);
    }

    store.deleteContent();
    IOUtils.close(store);
  }
    public void indexDoc() throws IOException {
      Document d = new Document();

      FieldType customType1 = new FieldType(TextField.TYPE_STORED);
      customType1.setTokenized(false);
      customType1.setOmitNorms(true);

      ArrayList<Field> fields = new ArrayList<>();
      String idString = getIdString();
      Field idField = newField("id", idString, customType1);
      fields.add(idField);

      int nFields = nextInt(maxFields);
      for (int i = 0; i < nFields; i++) {

        FieldType customType = new FieldType();
        switch (nextInt(4)) {
          case 0:
            break;
          case 1:
            customType.setStoreTermVectors(true);
            break;
          case 2:
            customType.setStoreTermVectors(true);
            customType.setStoreTermVectorPositions(true);
            break;
          case 3:
            customType.setStoreTermVectors(true);
            customType.setStoreTermVectorOffsets(true);
            break;
        }

        switch (nextInt(4)) {
          case 0:
            customType.setStored(true);
            customType.setOmitNorms(true);
            customType.setIndexed(true);
            fields.add(newField("f" + nextInt(100), getString(1), customType));
            break;
          case 1:
            customType.setIndexed(true);
            customType.setTokenized(true);
            fields.add(newField("f" + nextInt(100), getString(0), customType));
            break;
          case 2:
            customType.setStored(true);
            customType.setStoreTermVectors(false);
            customType.setStoreTermVectorOffsets(false);
            customType.setStoreTermVectorPositions(false);
            fields.add(newField("f" + nextInt(100), getString(0), customType));
            break;
          case 3:
            customType.setStored(true);
            customType.setIndexed(true);
            customType.setTokenized(true);
            fields.add(newField("f" + nextInt(100), getString(bigFieldSize), customType));
            break;
        }
      }

      if (sameFieldOrder) {
        Collections.sort(fields, fieldNameComparator);
      } else {
        // random placement of id field also
        Collections.swap(fields, nextInt(fields.size()), 0);
      }

      for (int i = 0; i < fields.size(); i++) {
        d.add(fields.get(i));
      }
      if (VERBOSE) {
        System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
      }
      w.updateDocument(new Term("id", idString), d);
      // System.out.println(Thread.currentThread().getName() + ": indexing "+d);
      docs.put(idString, d);
    }
 protected void updateDocument(Term term, IndexDocument doc) throws Exception {
   writer.updateDocument(term, doc);
 }
 /**
  * Calls {@link IndexWriter#updateDocument(Term,Iterable)} and returns the generation that
  * reflects this change.
  */
 public long updateDocument(Term t, Iterable<? extends IndexableField> d) throws IOException {
   writer.updateDocument(t, d);
   // Return gen as of when indexing finished:
   return indexingGen.get();
 }
Beispiel #16
0
  @Test
  public void testRollingUpdates() throws Exception {
    Random random = new Random(random().nextLong());
    final BaseDirectoryWrapper dir = newDirectory();
    // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to
    // delete files"
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
    }

    final LineFileDocs docs = new LineFileDocs(random, true);

    // provider.register(new MemoryCodec());
    if (random().nextBoolean()) {
      Codec.setDefault(
          TestUtil.alwaysPostingsFormat(
              new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
    IndexSearcher s = null;
    final int numUpdates =
        (int)
            (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble())));
    if (VERBOSE) {
      System.out.println("TEST: numUpdates=" + numUpdates);
    }
    int updateCount = 0;
    // TODO: sometimes update ids not in order...
    for (int docIter = 0; docIter < numUpdates; docIter++) {
      final Document doc = docs.nextDoc();
      final String myID = Integer.toString(id);
      if (id == SIZE - 1) {
        id = 0;
      } else {
        id++;
      }
      if (VERBOSE) {
        System.out.println("  docIter=" + docIter + " id=" + id);
      }
      ((Field) doc.getField("docid")).setStringValue(myID);

      Term idTerm = new Term("docid", myID);

      final boolean doUpdate;
      if (s != null && updateCount < SIZE) {
        TopDocs hits = s.search(new TermQuery(idTerm), 1);
        assertEquals(1, hits.totalHits);
        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
        if (VERBOSE) {
          if (doUpdate) {
            System.out.println("  tryDeleteDocument failed");
          } else {
            System.out.println("  tryDeleteDocument succeeded");
          }
        }
      } else {
        doUpdate = true;
        if (VERBOSE) {
          System.out.println("  no searcher: doUpdate=true");
        }
      }

      updateCount++;

      if (doUpdate) {
        if (random().nextBoolean()) {
          w.updateDocument(idTerm, doc);
        } else {
          // It's OK to not be atomic for this test (no separate thread reopening readers):
          w.deleteDocuments(new TermQuery(idTerm));
          w.addDocument(doc);
        }
      } else {
        w.addDocument(doc);
      }

      if (docIter >= SIZE && random().nextInt(50) == 17) {
        if (r != null) {
          r.close();
        }

        final boolean applyDeletions = random().nextBoolean();

        if (VERBOSE) {
          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
        }

        r = w.getReader(applyDeletions);
        if (applyDeletions) {
          s = newSearcher(r);
        } else {
          s = null;
        }
        assertTrue(
            "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE,
            !applyDeletions || r.numDocs() == SIZE);
        updateCount = 0;
      }
    }

    if (r != null) {
      r.close();
    }

    w.commit();
    assertEquals(SIZE, w.numDocs());

    w.close();

    TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates");

    docs.close();

    // LUCENE-4455:
    SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    long totalBytes = 0;
    for (SegmentCommitInfo sipc : infos) {
      totalBytes += sipc.sizeInBytes();
    }
    long totalBytes2 = 0;

    for (String fileName : dir.listAll()) {
      if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) {
        totalBytes2 += dir.fileLength(fileName);
      }
    }
    assertEquals(totalBytes2, totalBytes);
    dir.close();
  }
Beispiel #17
0
  @Test
  public void testNewChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // set default codec - all segments need checksums
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
        assertThat(
            "File: " + meta.name() + " has a different checksum",
            meta.checksum(),
            equalTo(checksum));
        assertThat(meta.hasLegacyChecksum(), equalTo(false));
        assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        if (meta.name().endsWith(".si") || meta.name().startsWith("segments_")) {
          assertThat(meta.hash().length, greaterThan(0));
        }
      }
    }
    assertConsistent(store, metadata);

    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }