/**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
  protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
      if (field.storedPayloads) {
        mapping.put(
            field.name,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                filter = new TypeAsPayloadTokenFilter(filter);
                return new TokenStreamComponents(tokenizer, filter);
              }
            });
      }
    }
    PerFieldAnalyzerWrapper wrapper =
        new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
      Document d = new Document();
      d.add(new Field("id", doc.id, StringField.TYPE_STORED));
      for (int i = 0; i < doc.fieldContent.length; i++) {
        FieldType type = new FieldType(TextField.TYPE_STORED);
        TestFieldSetting fieldSetting = doc.fieldSettings[i];

        type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
        type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
        type.setStoreTermVectorPositions(
            fieldSetting.storedPositions
                || fieldSetting.storedPayloads
                || fieldSetting.storedOffset);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
      }
      writer.updateDocument(new Term("id", doc.id), d);
      writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
  }
 public void testTermDocsEnum() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
   assertEquals(0, de.nextDoc());
   assertEquals(1, de.nextDoc());
   assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
   r.close();
   dir.close();
 }
 public void testSeparateEnums() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum d1 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   DocsEnum d2 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   assertEquals(0, d1.nextDoc());
   assertEquals(0, d2.nextDoc());
   r.close();
   dir.close();
 }
  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }
  /**
   * Assert that the {@code scoreType} operates as expected and parents are found in the expected
   * order.
   *
   * <p>This will use the test index's parent/child types to create parents with multiple children.
   * Each child will have a randomly generated scored stored in {@link #CHILD_SCORE_NAME}, which is
   * used to score based on the {@code scoreType} by using a {@link MockScorer} to determine the
   * expected scores.
   *
   * @param scoreType The score type to use within the query to score parents relative to their
   *     children.
   * @throws IOException if any unexpected error occurs
   */
  private void assertScoreType(ScoreType scoreType) throws IOException {
    SearchContext context = SearchContext.current();
    Directory directory = newDirectory();
    IndexWriter writer =
        new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));

    // calculates the expected score per parent
    MockScorer scorer = new MockScorer(scoreType);
    scorer.scores = new FloatArrayList(10);

    // number of parents to generate
    int parentDocs = scaledRandomIntBetween(2, 10);
    // unique child ID
    int childDocId = 0;

    // Parent ID to expected score
    Map<String, Float> parentScores = new TreeMap<>();

    // Add a few random parents to ensure that the children's score is appropriately taken into
    // account
    for (int parentDocId = 0; parentDocId < parentDocs; ++parentDocId) {
      String parent = Integer.toString(parentDocId);

      // Create the parent
      Document parentDocument = new Document();

      parentDocument.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      parentDocument.add(new StringField(IdFieldMapper.NAME, parent, Field.Store.YES));
      parentDocument.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));

      // add the parent to the index
      writer.addDocument(parentDocument);

      int numChildDocs = scaledRandomIntBetween(1, 10);

      // forget any parent's previous scores
      scorer.scores.clear();

      // associate children with the parent
      for (int i = 0; i < numChildDocs; ++i) {
        int childScore = random().nextInt(128);

        Document childDocument = new Document();

        childDocument.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        childDocument.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        // parent association:
        childDocument.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        childDocument.add(new DoubleField(CHILD_SCORE_NAME, childScore, Field.Store.NO));

        // remember the score to be calculated
        scorer.scores.add(childScore);

        // add the associated child to the index
        writer.addDocument(childDocument);
      }

      // this score that should be returned for this parent
      parentScores.put(parent, scorer.score());
    }

    writer.commit();

    IndexReader reader = DirectoryReader.open(writer, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    // setup to read the parent/child map
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) context).setSearcher(new ContextIndexSearcher(context, engineSearcher));

    // child query that returns the score as the value of "childScore" for each child document, with
    // the parent's score determined by the score type
    QueryBuilder childQueryBuilder =
        functionScoreQuery(typeFilter("child"))
            .add(new FieldValueFactorFunctionBuilder(CHILD_SCORE_NAME));
    QueryBuilder queryBuilder =
        hasChildQuery("child", childQueryBuilder)
            .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH))
            .setShortCircuitCutoff(parentDocs);

    // Perform the search for the documents using the selected score type
    TopDocs docs = searcher.search(parseQuery(queryBuilder), parentDocs);
    assertThat("Expected all parents", docs.totalHits, is(parentDocs));

    // score should be descending (just a sanity check)
    float topScore = docs.scoreDocs[0].score;

    // ensure each score is returned as expected
    for (int i = 0; i < parentDocs; ++i) {
      ScoreDoc scoreDoc = docs.scoreDocs[i];
      // get the ID from the document to get its expected score; remove it so we cannot double-count
      // it
      float score = parentScores.remove(reader.document(scoreDoc.doc).get(IdFieldMapper.NAME));

      // expect exact match
      assertThat("Unexpected score", scoreDoc.score, is(score));
      assertThat("Not descending", score, lessThanOrEqualTo(topScore));

      // it had better keep descending
      topScore = score;
    }

    reader.close();
    writer.close();
    directory.close();
  }
  /**
   * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then
   * creates ordered lists of reference object positions for each data item in the index with given
   * feature. Finally a new index (name "<indexPath>-ms") is created where all the original
   * documents as well as the new data are stored.
   *
   * @param indexPath the path to the original index
   * @throws IOException
   */
  public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
      throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
      referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
      System.err.println(
          "WARNING: There are deleted docs in your index. You should "
              + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
      count++;
      Document document = reader.document(i);
      document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
      iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      ImageSearchHits hits = searcher.search(document, readerRo);
      sb.delete(0, sb.length());
      for (int j = 0; j < numReferenceObjectsUsed; j++) {
        sb.append(hits.doc(j).getValues("ro-id")[0]);
        sb.append(' ');
      }
      // System.out.println(sb.toString());
      document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
      iw.updateDocument(
          new Term(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
          document);

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);
  }
示例#8
0
  @Test
  public void testRecoveryDiff() throws IOException, InterruptedException {
    int numDocs = 2 + random().nextInt(100);
    List<Document> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(
          new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      docs.add(doc);
    }
    long seed = random().nextLong();
    Store.MetadataSnapshot first;
    {
      Random random = new Random(seed);
      IndexWriterConfig iwc =
          new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
      iwc.setMergePolicy(NoMergePolicy.INSTANCE);
      iwc.setUseCompoundFile(random.nextBoolean());
      iwc.setMaxThreadStates(1);
      final ShardId shardId = new ShardId(new Index("index"), 1);
      DirectoryService directoryService = new LuceneManagedDirectoryService(random);
      Store store =
          new Store(
              shardId,
              ImmutableSettings.EMPTY,
              directoryService,
              randomDistributor(random, directoryService),
              new DummyShardLock(shardId));
      IndexWriter writer = new IndexWriter(store.directory(), iwc);
      final boolean lotsOfSegments = rarely(random);
      for (Document d : docs) {
        writer.addDocument(d);
        if (lotsOfSegments && random.nextBoolean()) {
          writer.commit();
        } else if (rarely(random)) {
          writer.commit();
        }
      }
      writer.commit();
      writer.close();
      first = store.getMetadata();
      assertDeleteContent(store, directoryService);
      store.close();
    }
    long time = new Date().getTime();
    while (time == new Date().getTime()) {
      Thread.sleep(10); // bump the time
    }
    Store.MetadataSnapshot second;
    Store store;
    {
      Random random = new Random(seed);
      IndexWriterConfig iwc =
          new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
      iwc.setMergePolicy(NoMergePolicy.INSTANCE);
      iwc.setUseCompoundFile(random.nextBoolean());
      iwc.setMaxThreadStates(1);
      final ShardId shardId = new ShardId(new Index("index"), 1);
      DirectoryService directoryService = new LuceneManagedDirectoryService(random);
      store =
          new Store(
              shardId,
              ImmutableSettings.EMPTY,
              directoryService,
              randomDistributor(random, directoryService),
              new DummyShardLock(shardId));
      IndexWriter writer = new IndexWriter(store.directory(), iwc);
      final boolean lotsOfSegments = rarely(random);
      for (Document d : docs) {
        writer.addDocument(d);
        if (lotsOfSegments && random.nextBoolean()) {
          writer.commit();
        } else if (rarely(random)) {
          writer.commit();
        }
      }
      writer.commit();
      writer.close();
      second = store.getMetadata();
    }
    Store.RecoveryDiff diff = first.recoveryDiff(second);
    assertThat(first.size(), equalTo(second.size()));
    for (StoreFileMetaData md : first) {
      assertThat(second.get(md.name()), notNullValue());
      // si files are different - containing timestamps etc
      assertThat(second.get(md.name()).isSame(md), equalTo(false));
    }
    assertThat(diff.different.size(), equalTo(first.size()));
    assertThat(
        diff.identical.size(),
        equalTo(0)); // in lucene 5 nothing is identical - we use random ids in file headers
    assertThat(diff.missing, empty());

    // check the self diff
    Store.RecoveryDiff selfDiff = first.recoveryDiff(first);
    assertThat(selfDiff.identical.size(), equalTo(first.size()));
    assertThat(selfDiff.different, empty());
    assertThat(selfDiff.missing, empty());

    // lets add some deletes
    Random random = new Random(seed);
    IndexWriterConfig iwc =
        new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setUseCompoundFile(random.nextBoolean());
    iwc.setMaxThreadStates(1);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    IndexWriter writer = new IndexWriter(store.directory(), iwc);
    writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs))));
    writer.commit();
    writer.close();
    Store.MetadataSnapshot metadata = store.getMetadata();
    StoreFileMetaData delFile = null;
    for (StoreFileMetaData md : metadata) {
      if (md.name().endsWith(".liv")) {
        delFile = md;
        break;
      }
    }
    Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second);
    if (delFile != null) {
      assertThat(
          afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file
      assertThat(afterDeleteDiff.different.size(), equalTo(0));
      assertThat(afterDeleteDiff.missing.size(), equalTo(2));
    } else {
      // an entire segment must be missing (single doc segment got dropped)
      assertThat(afterDeleteDiff.identical.size(), greaterThan(0));
      assertThat(afterDeleteDiff.different.size(), equalTo(0));
      assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different
    }

    // check the self diff
    selfDiff = metadata.recoveryDiff(metadata);
    assertThat(selfDiff.identical.size(), equalTo(metadata.size()));
    assertThat(selfDiff.different, empty());
    assertThat(selfDiff.missing, empty());

    // add a new commit
    iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setUseCompoundFile(
        true); // force CFS - easier to test here since we know it will add 3 files
    iwc.setMaxThreadStates(1);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    writer = new IndexWriter(store.directory(), iwc);
    writer.addDocument(docs.get(0));
    writer.close();

    Store.MetadataSnapshot newCommitMetaData = store.getMetadata();
    Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata);
    if (delFile != null) {
      assertThat(
          newCommitDiff.identical.size(),
          equalTo(
              newCommitMetaData.size()
                  - 5)); // segments_N, del file, cfs, cfe, si for the new segment
      assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different
      assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv"));
      assertThat(
          newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment
    } else {
      assertThat(
          newCommitDiff.identical.size(),
          equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment
      assertThat(newCommitDiff.different.size(), equalTo(0));
      assertThat(
          newCommitDiff.missing.size(),
          equalTo(
              4)); // an entire segment must be missing (single doc segment got dropped)  plus the
                   // commit is different
    }

    store.deleteContent();
    IOUtils.close(store);
  }
示例#9
0
  @Test
  public void testMixedChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        if (meta.checksum() == null) {
          String checksum = null;
          try {
            CodecUtil.retrieveChecksum(input);
            fail("expected a corrupt index - posting format has not checksums");
          } catch (CorruptIndexException
              | IndexFormatTooOldException
              | IndexFormatTooNewException ex) {
            try (ChecksumIndexInput checksumIndexInput =
                store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
              checksumIndexInput.seek(meta.length());
              checksum = Store.digestToString(checksumIndexInput.getChecksum());
            }
            // fine - it's a postings format without checksums
            checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null));
          }
        } else {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    checksums.write(store);
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      assertThat(
          "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue()));
      if (meta.hasLegacyChecksum()) {
        try (ChecksumIndexInput checksumIndexInput =
            store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
          checksumIndexInput.seek(meta.length());
          assertThat(
              meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum())));
        }
      } else {
        try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
示例#10
0
  @Test
  public void testNewChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // set default codec - all segments need checksums
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
        assertThat(
            "File: " + meta.name() + " has a different checksum",
            meta.checksum(),
            equalTo(checksum));
        assertThat(meta.hasLegacyChecksum(), equalTo(false));
        assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        if (meta.name().endsWith(".si") || meta.name().startsWith("segments_")) {
          assertThat(meta.hash().length, greaterThan(0));
        }
      }
    }
    assertConsistent(store, metadata);

    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
示例#11
0
  @Test
  public void testCleanupFromSnapshot() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriterConfig indexWriterConfig =
        newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec());
    // we keep all commits and that allows us clean based on multiple snapshots
    indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
    int docs = 1 + random().nextInt(100);
    int numCommits = 0;
    for (int i = 0; i < docs; i++) {
      if (i > 0 && randomIntBetween(0, 10) == 0) {
        writer.commit();
        numCommits++;
      }
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (numCommits < 1) {
      writer.commit();
      Document doc = new Document();
      doc.add(
          new TextField(
              "id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }

    Store.MetadataSnapshot firstMeta = store.getMetadata();

    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot secondMeta = store.getMetadata();

    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
      if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
        continue;
      }
      BytesRef hash = new BytesRef();
      if (file.startsWith("segments")) {
        hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
      }
      StoreFileMetaData storeFileMetaData =
          new StoreFileMetaData(
              file, store.directory().fileLength(file), file + "checksum", null, hash);
      legacyMeta.put(file, storeFileMetaData);
      checksums.add(storeFileMetaData);
    }
    checksums.write(
        store); // write one checksum file here - we expect it to survive all the cleanups

    if (randomBoolean()) {
      store.cleanupAndVerify("test", firstMeta);
      String[] strings = store.directory().listAll();
      int numChecksums = 0;
      int numNotFound = 0;
      for (String file : strings) {
        assertTrue(firstMeta.contains(file) || Store.isChecksum(file));
        if (Store.isChecksum(file)) {
          numChecksums++;
        } else if (secondMeta.contains(file) == false) {
          numNotFound++;
        }
      }
      assertTrue(
          "at least one file must not be in here since we have two commits?", numNotFound > 0);
      assertEquals(
          "we wrote one checksum but it's gone now? - checksums are supposed to be kept",
          numChecksums,
          1);
    } else {
      store.cleanupAndVerify("test", secondMeta);
      String[] strings = store.directory().listAll();
      int numChecksums = 0;
      int numNotFound = 0;
      for (String file : strings) {
        assertTrue(secondMeta.contains(file) || Store.isChecksum(file));
        if (Store.isChecksum(file)) {
          numChecksums++;
        } else if (firstMeta.contains(file) == false) {
          numNotFound++;
        }
      }
      assertTrue(
          "at least one file must not be in here since we have two commits?", numNotFound > 0);
      assertEquals(
          "we wrote one checksum but it's gone now? - checksums are supposed to be kept",
          numChecksums,
          1);
    }

    store.deleteContent();
    IOUtils.close(store);
  }
  public void runTest(String testName) throws Exception {

    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);

    final long t0 = System.currentTimeMillis();

    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random, true);
    final Path tempDir = createTempDir(testName);
    dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
    if (dir instanceof BaseDirectoryWrapper) {
      ((BaseDirectoryWrapper) dir)
          .setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
      ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
    }

    if (LuceneTestCase.TEST_NIGHTLY) {
      // newIWConfig makes smallish max seg size, which
      // results in tons and tons of segments for this test
      // when run nightly:
      MergePolicy mp = conf.getMergePolicy();
      if (mp instanceof TieredMergePolicy) {
        ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
      } else if (mp instanceof LogByteSizeMergePolicy) {
        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
      } else if (mp instanceof LogMergePolicy) {
        ((LogMergePolicy) mp).setMaxMergeDocs(100000);
      }
    }

    conf.setMergedSegmentWarmer(
        new IndexWriter.IndexReaderWarmer() {
          @Override
          public void warm(LeafReader reader) throws IOException {
            if (VERBOSE) {
              System.out.println("TEST: now warm merged reader=" + reader);
            }
            warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
            final int maxDoc = reader.maxDoc();
            final Bits liveDocs = reader.getLiveDocs();
            int sum = 0;
            final int inc = Math.max(1, maxDoc / 50);
            for (int docID = 0; docID < maxDoc; docID += inc) {
              if (liveDocs == null || liveDocs.get(docID)) {
                final StoredDocument doc = reader.document(docID);
                sum += doc.getFields().size();
              }
            }

            IndexSearcher searcher = newSearcher(reader);
            sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;

            if (VERBOSE) {
              System.out.println("TEST: warm visited " + sum + " fields");
            }
          }
        });

    if (VERBOSE) {
      conf.setInfoStream(
          new PrintStreamInfoStream(System.out) {
            @Override
            public void message(String component, String message) {
              if ("TP".equals(component)) {
                return; // ignore test points!
              }
              super.message(component, message);
            }
          });
    }
    writer = new IndexWriter(dir, conf);
    TestUtil.reduceOpenFiles(writer);

    final ExecutorService es =
        random().nextBoolean()
            ? null
            : Executors.newCachedThreadPool(new NamedThreadFactory(testName));

    doAfterWriter(es);

    final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);

    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());

    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;

    final Thread[] indexThreads =
        launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

    if (VERBOSE) {
      System.out.println(
          "TEST: DONE start "
              + NUM_INDEX_THREADS
              + " indexing threads ["
              + (System.currentTimeMillis() - t0)
              + " ms]");
    }

    // Let index build up a bit
    Thread.sleep(100);

    doSearching(es, stopTime);

    if (VERBOSE) {
      System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
    }

    for (int thread = 0; thread < indexThreads.length; thread++) {
      indexThreads[thread].join();
    }

    if (VERBOSE) {
      System.out.println(
          "TEST: done join indexing threads ["
              + (System.currentTimeMillis() - t0)
              + " ms]; addCount="
              + addCount
              + " delCount="
              + delCount);
    }

    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
      System.out.println("TEST: finalSearcher=" + s);
    }

    assertFalse(failed.get());

    boolean doFail = false;

    // Verify: make sure delIDs are in fact deleted:
    for (String id : delIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println(
            "doc id="
                + id
                + " is supposed to be deleted, but got "
                + hits.totalHits
                + " hits; first docID="
                + hits.scoreDocs[0].doc);
        doFail = true;
      }
    }

    // Verify: make sure delPackIDs are in fact deleted:
    for (String id : delPackIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println(
            "packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
        doFail = true;
      }
    }

    // Verify: make sure each group of sub-docs are still in docID order:
    for (SubDocs subDocs : allSubDocs) {
      TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
      if (!subDocs.deleted) {
        // We sort by relevance but the scores should be identical so sort falls back to by docID:
        if (hits.totalHits != subDocs.subIDs.size()) {
          System.out.println(
              "packID="
                  + subDocs.packID
                  + ": expected "
                  + subDocs.subIDs.size()
                  + " hits but got "
                  + hits.totalHits);
          doFail = true;
        } else {
          int lastDocID = -1;
          int startDocID = -1;
          for (ScoreDoc scoreDoc : hits.scoreDocs) {
            final int docID = scoreDoc.doc;
            if (lastDocID != -1) {
              assertEquals(1 + lastDocID, docID);
            } else {
              startDocID = docID;
            }
            lastDocID = docID;
            final StoredDocument doc = s.doc(docID);
            assertEquals(subDocs.packID, doc.get("packID"));
          }

          lastDocID = startDocID - 1;
          for (String subID : subDocs.subIDs) {
            hits = s.search(new TermQuery(new Term("docid", subID)), 1);
            assertEquals(1, hits.totalHits);
            final int docID = hits.scoreDocs[0].doc;
            if (lastDocID != -1) {
              assertEquals(1 + lastDocID, docID);
            }
            lastDocID = docID;
          }
        }
      } else {
        // Pack was deleted -- make sure its docs are
        // deleted.  We can't verify packID is deleted
        // because we can re-use packID for update:
        for (String subID : subDocs.subIDs) {
          assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
        }
      }
    }

    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();

    for (int id = 0; id < endID; id++) {
      String stringID = "" + id;
      if (!delIDs.contains(stringID)) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
        if (hits.totalHits != 1) {
          System.out.println(
              "doc id="
                  + stringID
                  + " is not supposed to be deleted, but got hitCount="
                  + hits.totalHits
                  + "; delIDs="
                  + delIDs);
          doFail = true;
        }
      }
    }
    assertFalse(doFail);

    assertEquals(
        "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount,
        addCount.get() - delCount.get(),
        s.getIndexReader().numDocs());
    releaseSearcher(s);

    writer.commit();

    assertEquals(
        "index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount,
        addCount.get() - delCount.get(),
        writer.numDocs());

    doClose();

    try {
      writer.commit();
    } finally {
      writer.close();
    }

    // Cannot close until after writer is closed because
    // writer has merged segment warmer that uses IS to run
    // searches, and that IS may be using this es!
    if (es != null) {
      es.shutdown();
      es.awaitTermination(1, TimeUnit.SECONDS);
    }

    TestUtil.checkIndex(dir);
    dir.close();
    IOUtils.rm(tempDir);

    if (VERBOSE) {
      System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
  }