@Test
  public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter =
        new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(
        new Field(
            "content",
            "the big bad dog",
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
    String fragment =
        highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, notNullValue());
    assertThat(fragment, equalTo("the big <b>bad</b> dog"));
  }
Ejemplo n.º 2
0
  public void testDuelGeoPoints() throws Exception {
    final String mapping =
        XContentFactory.jsonBuilder()
            .startObject()
            .startObject("type")
            .startObject("properties")
            .startObject("geopoint")
            .field("type", "geo_point")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .endObject()
            .endObject()
            .endObject()
            .string();

    final DocumentMapper mapper = mapperService.documentMapperParser().parse(mapping);

    Random random = getRandom();
    int atLeast = scaledRandomIntBetween(1000, 1500);
    int maxValuesPerDoc = randomBoolean() ? 1 : randomIntBetween(2, 40);
    // to test deduplication
    double defaultLat = randomDouble() * 180 - 90;
    double defaultLon = randomDouble() * 360 - 180;
    for (int i = 0; i < atLeast; i++) {
      final int numValues = randomInt(maxValuesPerDoc);
      XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("geopoint");
      for (int j = 0; j < numValues; ++j) {
        if (randomBoolean()) {
          doc.startObject().field("lat", defaultLat).field("lon", defaultLon).endObject();
        } else {
          doc.startObject()
              .field("lat", randomDouble() * 180 - 90)
              .field("lon", randomDouble() * 360 - 180)
              .endObject();
        }
      }
      doc = doc.endArray().endObject();
      final ParsedDocument d = mapper.parse("type", Integer.toString(i), doc.bytes());

      writer.addDocument(d.rootDoc());
      if (random.nextInt(10) == 0) {
        refreshReader();
      }
    }
    AtomicReaderContext context = refreshReader();
    Map<FieldDataType, Type> typeMap = new HashMap<>();
    final Distance precision = new Distance(1, randomFrom(DistanceUnit.values()));
    typeMap.put(
        new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "array")),
        Type.GeoPoint);
    typeMap.put(
        new FieldDataType(
            "geo_point",
            ImmutableSettings.builder().put("format", "compressed").put("precision", precision)),
        Type.GeoPoint);
    typeMap.put(
        new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "doc_values")),
        Type.GeoPoint);

    ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
    while (!list.isEmpty()) {
      Entry<FieldDataType, Type> left;
      Entry<FieldDataType, Type> right;
      if (list.size() > 1) {
        left = list.remove(random.nextInt(list.size()));
        right = list.remove(random.nextInt(list.size()));
      } else {
        right = left = list.remove(0);
      }
      ifdService.clear();
      IndexGeoPointFieldData leftFieldData =
          getForField(left.getKey(), left.getValue().name().toLowerCase(Locale.ROOT));

      ifdService.clear();
      IndexGeoPointFieldData rightFieldData =
          getForField(right.getKey(), right.getValue().name().toLowerCase(Locale.ROOT));

      duelFieldDataGeoPoint(random, context, leftFieldData, rightFieldData, precision);
      duelFieldDataGeoPoint(random, context, rightFieldData, leftFieldData, precision);

      DirectoryReader perSegment = DirectoryReader.open(writer, true);
      CompositeReaderContext composite = perSegment.getContext();
      List<AtomicReaderContext> leaves = composite.leaves();
      for (AtomicReaderContext atomicReaderContext : leaves) {
        duelFieldDataGeoPoint(
            random, atomicReaderContext, leftFieldData, rightFieldData, precision);
      }
      perSegment.close();
    }
  }
Ejemplo n.º 3
0
  @Test
  public void testDuelAllTypesSingleValue() throws Exception {
    final String mapping =
        XContentFactory.jsonBuilder()
            .startObject()
            .startObject("type")
            .startObject("properties")
            .startObject("bytes")
            .field("type", "string")
            .field("index", "not_analyzed")
            .startObject("fielddata")
            .field("format", LuceneTestCase.defaultCodecSupportsSortedSet() ? "doc_values" : "fst")
            .endObject()
            .endObject()
            .startObject("byte")
            .field("type", "byte")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .startObject("short")
            .field("type", "short")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .startObject("integer")
            .field("type", "integer")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .startObject("long")
            .field("type", "long")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .startObject("float")
            .field("type", "float")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .startObject("double")
            .field("type", "double")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .endObject()
            .endObject()
            .endObject()
            .string();
    final DocumentMapper mapper = mapperService.documentMapperParser().parse(mapping);
    Random random = getRandom();
    int atLeast = scaledRandomIntBetween(1000, 1500);
    for (int i = 0; i < atLeast; i++) {
      String s = Integer.toString(randomByte());

      XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
      for (String fieldName :
          Arrays.asList("bytes", "byte", "short", "integer", "long", "float", "double")) {
        doc = doc.field(fieldName, s);
      }

      doc = doc.endObject();

      final ParsedDocument d = mapper.parse("type", Integer.toString(i), doc.bytes());

      writer.addDocument(d.rootDoc());

      if (random.nextInt(10) == 0) {
        refreshReader();
      }
    }
    AtomicReaderContext context = refreshReader();
    Map<FieldDataType, Type> typeMap = new HashMap<>();
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")),
        Type.Bytes);
    typeMap.put(
        new FieldDataType("byte", ImmutableSettings.builder().put("format", "array")),
        Type.Integer);
    typeMap.put(
        new FieldDataType("short", ImmutableSettings.builder().put("format", "array")),
        Type.Integer);
    typeMap.put(
        new FieldDataType("int", ImmutableSettings.builder().put("format", "array")), Type.Integer);
    typeMap.put(
        new FieldDataType("long", ImmutableSettings.builder().put("format", "array")), Type.Long);
    typeMap.put(
        new FieldDataType("double", ImmutableSettings.builder().put("format", "array")),
        Type.Double);
    typeMap.put(
        new FieldDataType("float", ImmutableSettings.builder().put("format", "array")), Type.Float);
    typeMap.put(
        new FieldDataType("byte", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Integer);
    typeMap.put(
        new FieldDataType("short", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Integer);
    typeMap.put(
        new FieldDataType("int", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Integer);
    typeMap.put(
        new FieldDataType("long", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Long);
    typeMap.put(
        new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Double);
    typeMap.put(
        new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Float);
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Bytes);
    ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
    Preprocessor pre = new ToDoublePreprocessor();
    while (!list.isEmpty()) {
      Entry<FieldDataType, Type> left;
      Entry<FieldDataType, Type> right;
      if (list.size() > 1) {
        left = list.remove(random.nextInt(list.size()));
        right = list.remove(random.nextInt(list.size()));
      } else {
        right = left = list.remove(0);
      }

      ifdService.clear();
      IndexFieldData<?> leftFieldData =
          getForField(left.getKey(), left.getValue().name().toLowerCase(Locale.ROOT));
      ifdService.clear();
      IndexFieldData<?> rightFieldData =
          getForField(right.getKey(), right.getValue().name().toLowerCase(Locale.ROOT));
      duelFieldDataBytes(random, context, leftFieldData, rightFieldData, pre);
      duelFieldDataBytes(random, context, rightFieldData, leftFieldData, pre);

      DirectoryReader perSegment = DirectoryReader.open(writer, true);
      CompositeReaderContext composite = perSegment.getContext();
      List<AtomicReaderContext> leaves = composite.leaves();
      for (AtomicReaderContext atomicReaderContext : leaves) {
        duelFieldDataBytes(random, atomicReaderContext, leftFieldData, rightFieldData, pre);
      }
    }
  }
Ejemplo n.º 4
0
  @Test
  public void testDuelStrings() throws Exception {
    Random random = getRandom();
    int atLeast = scaledRandomIntBetween(1000, 1500);
    for (int i = 0; i < atLeast; i++) {
      Document d = new Document();
      d.add(new StringField("_id", "" + i, Field.Store.NO));
      if (random.nextInt(15) != 0) {
        int[] numbers = getNumbers(random, Integer.MAX_VALUE);
        for (int j : numbers) {
          final String s = English.longToEnglish(j);
          d.add(new StringField("bytes", s, Field.Store.NO));
          d.add(new SortedSetDocValuesField("bytes", new BytesRef(s)));
        }
        if (random.nextInt(10) == 0) {
          d.add(new StringField("bytes", "", Field.Store.NO));
          d.add(new SortedSetDocValuesField("bytes", new BytesRef()));
        }
      }
      writer.addDocument(d);
      if (random.nextInt(10) == 0) {
        refreshReader();
      }
    }
    AtomicReaderContext context = refreshReader();
    Map<FieldDataType, Type> typeMap = new HashMap<>();
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")),
        Type.Bytes);
    typeMap.put(
        new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Bytes);
    // TODO add filters
    ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
    Preprocessor pre = new Preprocessor();
    while (!list.isEmpty()) {
      Entry<FieldDataType, Type> left;
      Entry<FieldDataType, Type> right;
      if (list.size() > 1) {
        left = list.remove(random.nextInt(list.size()));
        right = list.remove(random.nextInt(list.size()));
      } else {
        right = left = list.remove(0);
      }
      ifdService.clear();
      IndexFieldData<?> leftFieldData =
          getForField(left.getKey(), left.getValue().name().toLowerCase(Locale.ROOT));

      ifdService.clear();
      IndexFieldData<?> rightFieldData =
          getForField(right.getKey(), right.getValue().name().toLowerCase(Locale.ROOT));

      duelFieldDataBytes(random, context, leftFieldData, rightFieldData, pre);
      duelFieldDataBytes(random, context, rightFieldData, leftFieldData, pre);

      DirectoryReader perSegment = DirectoryReader.open(writer, true);
      CompositeReaderContext composite = perSegment.getContext();
      List<AtomicReaderContext> leaves = composite.leaves();
      for (AtomicReaderContext atomicReaderContext : leaves) {
        duelFieldDataBytes(random, atomicReaderContext, leftFieldData, rightFieldData, pre);
      }
      perSegment.close();
    }
  }
Ejemplo n.º 5
0
  @Test
  public void testDuelDoubles() throws Exception {
    final String mapping =
        XContentFactory.jsonBuilder()
            .startObject()
            .startObject("type")
            .startObject("properties")
            .startObject("float")
            .field("type", "float")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .startObject("double")
            .field("type", "double")
            .startObject("fielddata")
            .field("format", "doc_values")
            .endObject()
            .endObject()
            .endObject()
            .endObject()
            .endObject()
            .string();

    final DocumentMapper mapper = mapperService.documentMapperParser().parse(mapping);
    Random random = getRandom();
    int atLeast = scaledRandomIntBetween(1000, 1500);
    final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
    float[] values = new float[maxNumValues];
    for (int i = 0; i < atLeast; i++) {
      int numValues = randomInt(maxNumValues);
      float def = randomBoolean() ? randomFloat() : Float.NaN;
      // FD loses values if they are duplicated, so we must deduplicate for this test
      Set<Float> vals = new HashSet<Float>();
      for (int j = 0; j < numValues; ++j) {
        if (randomBoolean()) {
          vals.add(def);
        } else {
          vals.add(randomFloat());
        }
      }
      numValues = vals.size();
      int upto = 0;
      for (Float f : vals) {
        values[upto++] = f.floatValue();
      }

      XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("float");
      for (int j = 0; j < numValues; ++j) {
        doc = doc.value(values[j]);
      }
      doc = doc.endArray().startArray("double");
      for (int j = 0; j < numValues; ++j) {
        doc = doc.value(values[j]);
      }
      doc = doc.endArray().endObject();

      final ParsedDocument d = mapper.parse("type", Integer.toString(i), doc.bytes());

      writer.addDocument(d.rootDoc());
      if (random.nextInt(10) == 0) {
        refreshReader();
      }
    }
    AtomicReaderContext context = refreshReader();
    Map<FieldDataType, Type> typeMap = new HashMap<>();
    typeMap.put(
        new FieldDataType("double", ImmutableSettings.builder().put("format", "array")),
        Type.Double);
    typeMap.put(
        new FieldDataType("float", ImmutableSettings.builder().put("format", "array")), Type.Float);
    typeMap.put(
        new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Double);
    typeMap.put(
        new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")),
        Type.Float);
    ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
    while (!list.isEmpty()) {
      Entry<FieldDataType, Type> left;
      Entry<FieldDataType, Type> right;
      if (list.size() > 1) {
        left = list.remove(random.nextInt(list.size()));
        right = list.remove(random.nextInt(list.size()));
      } else {
        right = left = list.remove(0);
      }
      ifdService.clear();
      IndexNumericFieldData leftFieldData =
          getForField(left.getKey(), left.getValue().name().toLowerCase(Locale.ROOT));

      ifdService.clear();
      IndexNumericFieldData rightFieldData =
          getForField(right.getKey(), right.getValue().name().toLowerCase(Locale.ROOT));

      duelFieldDataDouble(random, context, leftFieldData, rightFieldData);
      duelFieldDataDouble(random, context, rightFieldData, leftFieldData);

      DirectoryReader perSegment = DirectoryReader.open(writer, true);
      CompositeReaderContext composite = perSegment.getContext();
      List<AtomicReaderContext> leaves = composite.leaves();
      for (AtomicReaderContext atomicReaderContext : leaves) {
        duelFieldDataDouble(random, atomicReaderContext, leftFieldData, rightFieldData);
      }
    }
  }
  @Test
  public void testVectorHighlighterPrefixQuery() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter =
        new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(
        new Field(
            "content",
            "the big bad dog",
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();

    PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(
        prefixQuery.getRewriteMethod().getClass().getName(),
        equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
    String fragment =
        highlighter.getBestFragment(
            highlighter.getFieldQuery(prefixQuery),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, nullValue());

    prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    Query rewriteQuery = prefixQuery.rewrite(reader);
    fragment =
        highlighter.getBestFragment(
            highlighter.getFieldQuery(rewriteQuery),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, notNullValue());

    // now check with the custom field query
    prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(
        prefixQuery.getRewriteMethod().getClass().getName(),
        equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
    fragment =
        highlighter.getBestFragment(
            new CustomFieldQuery(prefixQuery, reader, highlighter),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, notNullValue());
  }
Ejemplo n.º 7
0
  @Test
  public void testMixedChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        if (meta.checksum() == null) {
          String checksum = null;
          try {
            CodecUtil.retrieveChecksum(input);
            fail("expected a corrupt index - posting format has not checksums");
          } catch (CorruptIndexException
              | IndexFormatTooOldException
              | IndexFormatTooNewException ex) {
            try (ChecksumIndexInput checksumIndexInput =
                store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
              checksumIndexInput.seek(meta.length());
              checksum = Store.digestToString(checksumIndexInput.getChecksum());
            }
            // fine - it's a postings format without checksums
            checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null));
          }
        } else {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    checksums.write(store);
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      assertThat(
          "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue()));
      if (meta.hasLegacyChecksum()) {
        try (ChecksumIndexInput checksumIndexInput =
            store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
          checksumIndexInput.seek(meta.length());
          assertThat(
              meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum())));
        }
      } else {
        try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
Ejemplo n.º 8
0
  @Test
  public void testNewChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // set default codec - all segments need checksums
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
        assertThat(
            "File: " + meta.name() + " has a different checksum",
            meta.checksum(),
            equalTo(checksum));
        assertThat(meta.hasLegacyChecksum(), equalTo(false));
        assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        if (meta.name().endsWith(".si") || meta.name().startsWith("segments_")) {
          assertThat(meta.hash().length, greaterThan(0));
        }
      }
    }
    assertConsistent(store, metadata);

    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
Ejemplo n.º 9
0
  // IF THIS TEST FAILS ON UPGRADE GO LOOK AT THE
  // OldSIMockingCodec!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  @Test
  public void testWriteLegacyChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // set default codec - all segments need checksums
    final boolean usesOldCodec = randomBoolean();
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(usesOldCodec ? new OldSIMockingCodec() : actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed

    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
      if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
        continue;
      }
      BytesRef hash = new BytesRef();
      if (file.startsWith("segments")) {
        hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
      }
      StoreFileMetaData storeFileMetaData =
          new StoreFileMetaData(
              file, store.directory().fileLength(file), file + "checksum", null, hash);
      legacyMeta.put(file, storeFileMetaData);
      checksums.add(storeFileMetaData);
    }
    checksums.write(store);

    metadata = store.getMetadata();
    Map<String, StoreFileMetaData> stringStoreFileMetaDataMap = metadata.asMap();
    assertThat(legacyMeta.size(), equalTo(stringStoreFileMetaDataMap.size()));
    if (usesOldCodec) {
      for (StoreFileMetaData meta : legacyMeta.values()) {
        assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
        assertEquals(meta.name() + "checksum", meta.checksum());
        assertTrue(
            meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()),
            stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
      }
    } else {

      // even if we have a legacy checksum - if we use a new codec we should reuse
      for (StoreFileMetaData meta : legacyMeta.values()) {
        assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
        assertFalse(
            meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()),
            stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
        StoreFileMetaData storeFileMetaData = metadata.get(meta.name());
        try (IndexInput input =
            store.openVerifyingInput(meta.name(), IOContext.DEFAULT, storeFileMetaData)) {
          assertTrue(storeFileMetaData.toString(), input instanceof Store.VerifyingIndexInput);
          input.seek(meta.length());
          Store.verify(input);
        }
      }
    }
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }