public void testDocValuesUnstored() throws IOException {
   Directory dir = newDirectory();
   IndexWriterConfig iwconfig =
       newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
   iwconfig.setMergePolicy(newLogMergePolicy());
   IndexWriter writer = new IndexWriter(dir, iwconfig);
   for (int i = 0; i < 50; i++) {
     Document doc = new Document();
     doc.add(new NumericDocValuesField("dv", i));
     doc.add(new TextField("docId", "" + i, Field.Store.YES));
     writer.addDocument(doc);
   }
   DirectoryReader r = writer.getReader();
   SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r);
   FieldInfos fi = slow.getFieldInfos();
   FieldInfo dvInfo = fi.fieldInfo("dv");
   assertTrue(dvInfo.hasDocValues());
   NumericDocValues dv = slow.getNumericDocValues("dv");
   for (int i = 0; i < 50; i++) {
     assertEquals(i, dv.get(i));
     StoredDocument d = slow.document(i);
     // cannot use d.get("dv") due to another bug!
     assertNull(d.getField("dv"));
     assertEquals(Integer.toString(i), d.get("docId"));
   }
   slow.close();
   writer.close();
   dir.close();
 }
  /** make sure we downgrade positions and payloads correctly */
  public void testMixing() throws Exception {
    // no positions
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);

    for (int i = 0; i < 20; i++) {
      Document doc = new Document();
      if (i < 19 && random().nextBoolean()) {
        for (int j = 0; j < 50; j++) {
          doc.add(new TextField("foo", "i have positions", Field.Store.NO));
        }
      } else {
        for (int j = 0; j < 50; j++) {
          doc.add(new Field("foo", "i have no positions", ft));
        }
      }
      iw.addDocument(doc);
      iw.commit();
    }

    if (random().nextBoolean()) {
      iw.forceMerge(1);
    }

    DirectoryReader ir = iw.getReader();
    FieldInfos fis = MultiFields.getMergedFieldInfos(ir);
    assertEquals(IndexOptions.DOCS_AND_FREQS, fis.fieldInfo("foo").getIndexOptions());
    assertFalse(fis.fieldInfo("foo").hasPayloads());
    iw.close();
    ir.close();
    dir.close(); // checkindex
  }
  public SepPostingsReader(
      Directory dir,
      FieldInfos fieldInfos,
      SegmentInfo segmentInfo,
      IOContext context,
      IntStreamFactory intFactory,
      String segmentSuffix)
      throws IOException {
    boolean success = false;
    try {

      final String docFileName =
          IndexFileNames.segmentFileName(
              segmentInfo.name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION);
      docIn = intFactory.openInput(dir, docFileName, context);

      skipIn =
          dir.openInput(
              IndexFileNames.segmentFileName(
                  segmentInfo.name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION),
              context);

      if (fieldInfos.hasFreq()) {
        freqIn =
            intFactory.openInput(
                dir,
                IndexFileNames.segmentFileName(
                    segmentInfo.name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION),
                context);
      } else {
        freqIn = null;
      }
      if (fieldInfos.hasProx()) {
        posIn =
            intFactory.openInput(
                dir,
                IndexFileNames.segmentFileName(
                    segmentInfo.name, segmentSuffix, SepPostingsWriter.POS_EXTENSION),
                context);
        payloadIn =
            dir.openInput(
                IndexFileNames.segmentFileName(
                    segmentInfo.name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION),
                context);
      } else {
        posIn = null;
        payloadIn = null;
      }
      success = true;
    } finally {
      if (!success) {
        close();
      }
    }
  }
Beispiel #4
0
  // Tests whether merging of docs that have different
  // omitTermFreqAndPositions for the same field works
  public void testMixedMerge() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy(2)));
    Document d = new Document();

    // this field will have Tf
    Field f1 = newField("f1", "This field has term freqs", normalType);
    d.add(f1);

    // this field will NOT have Tf
    Field f2 = newField("f2", "This field has NO Tf in all docs", omitType);
    d.add(f2);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    // now we add another document which has term freq for field f2 and not for f1 and verify if the
    // SegmentMerger
    // keep things constant
    d = new Document();

    // Reverese
    f1 = newField("f1", "This field has term freqs", omitType);
    d.add(f1);

    f2 = newField("f2", "This field has NO Tf in all docs", normalType);
    d.add(f2);

    for (int i = 0; i < 30; i++) writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertEquals(
        "OmitTermFreqAndPositions field bit should be set.",
        IndexOptions.DOCS_ONLY,
        fi.fieldInfo("f1").getIndexOptions());
    assertEquals(
        "OmitTermFreqAndPositions field bit should be set.",
        IndexOptions.DOCS_ONLY,
        fi.fieldInfo("f2").getIndexOptions());

    reader.close();
    ram.close();
  }
  // Tests whether merging of docs that have different
  // omitNorms for the same field works
  public void testMixedMerge() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy(2)));
    Document d = new Document();

    // this field will have norms
    Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
    d.add(f1);

    // this field will NOT have norms
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f2 = newField("f2", "This field has NO norms in all docs", customType);
    d.add(f2);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    // now we add another document which has norms for field f2 and not for f1 and verify if the
    // SegmentMerger
    // keep things constant
    d = new Document();

    // Reverese
    d.add(newField("f1", "This field has norms", customType));

    d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

    reader.close();
    ram.close();
  }
 private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
   for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
     FieldInfo info = infos.fieldInfo(fieldNumber);
     if (info == null) {
       throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
     } else if (!info.hasNorms()) {
       throw new CorruptIndexException("Invalid field: " + info.name, meta);
     }
     NormsEntry entry = new NormsEntry();
     entry.docsWithFieldOffset = meta.readLong();
     entry.numDocsWithField = meta.readInt();
     entry.bytesPerNorm = meta.readByte();
     switch (entry.bytesPerNorm) {
       case 0:
       case 1:
       case 2:
       case 4:
       case 8:
         break;
       default:
         throw new CorruptIndexException(
             "Invalid bytesPerValue: " + entry.bytesPerNorm + ", field: " + info.name, meta);
     }
     entry.normsOffset = meta.readLong();
     norms.put(info.number, entry);
   }
 }
 public void testFieldNames() throws Exception {
   Directory dir1 = getDir1(random());
   Directory dir2 = getDir2(random());
   ParallelLeafReader pr =
       new ParallelLeafReader(
           SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)),
           SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2)));
   FieldInfos fieldInfos = pr.getFieldInfos();
   assertEquals(4, fieldInfos.size());
   assertNotNull(fieldInfos.fieldInfo("f1"));
   assertNotNull(fieldInfos.fieldInfo("f2"));
   assertNotNull(fieldInfos.fieldInfo("f3"));
   assertNotNull(fieldInfos.fieldInfo("f4"));
   pr.close();
   dir1.close();
   dir2.close();
 }
  public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
      throws IOException {
    final String termsFileName =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

    this.postingsReader = postingsReader;
    final IndexInput in = state.directory.openInput(termsFileName, state.context);

    boolean success = false;
    try {
      version = readHeader(in);
      if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
        CodecUtil.checksumEntireFile(in);
      }
      this.postingsReader.init(in);
      seekDir(in);

      final FieldInfos fieldInfos = state.fieldInfos;
      final int numFields = in.readVInt();
      for (int i = 0; i < numFields; i++) {
        int fieldNumber = in.readVInt();
        FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
        long numTerms = in.readVLong();
        long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
        long sumDocFreq = in.readVLong();
        int docCount = in.readVInt();
        int longsSize = in.readVInt();
        TermsReader current =
            new TermsReader(
                fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
        TermsReader previous = fields.put(fieldInfo.name, current);
        checkFieldSummary(state.segmentInfo, in, current, previous);
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }
  }
  void createDocumentNode(final DocumentDescriptor inDescriptor) throws IOException {
    try {
      _document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
      _rootNode = _document.createElement("document");
    } catch (ParserConfigurationException e) {
      e.printStackTrace();
      System.exit(1);
    }
    AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader();
    _rootNode.setAttribute("id", DocumentIdOperations.documentDescriptorToId(inDescriptor));
    // TODO: implement the proper way of building a title from the production report
    _rootNode.setAttribute("title", buildDocumentTitle(segmentReader, inDescriptor));
    _rootNode.setAttribute("path", "ruscorpora.ru");
    _rootNode.setAttribute(
        "tagging", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "tagging"));
    _rootNode.setAttribute("snippets", "0");
    Element attributesNode = _document.createElement("attributes");

    _rootNode.appendChild(attributesNode);

    FieldInfos fields = segmentReader.getFieldInfos();
    for (int fieldIndex = 0; fieldIndex != fields.size(); ++fieldIndex) {
      FieldInfo field = fields.fieldInfo(fieldIndex);
      // TODO: understand why field may turn into null
      if (field == null) {
        continue;
      }
      String name = field.name;
      if (Attributes.ATTRIBUTES.contains(name)
          || Attributes.ATTRIBUTES_FOR_REPORT.contains(name)
          || Attributes.ATTRIBUTES_FOR_WORD_INFO.contains(name)
          || !field.hasDocValues()) {
        // it's a word attribute
        continue;
      }
      Element attrNode = _document.createElement("attr");
      attrNode.setAttribute("name", name);
      attrNode.setAttribute(
          "value", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, name));
      attributesNode.appendChild(attrNode);
    }
  }
Beispiel #10
0
  public void testDocValues() throws IOException {
    assertU(adoc("id", "1"));
    assertU(commit());
    try (SolrCore core = h.getCoreInc()) {
      final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
      final SolrIndexSearcher searcher = searcherRef.get();
      try {
        final LeafReader reader = searcher.getLeafReader();
        assertEquals(1, reader.numDocs());
        final FieldInfos infos = reader.getFieldInfos();
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType());
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType());
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType());
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType());
        assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType());

        assertEquals((long) Float.floatToIntBits(1), reader.getNumericDocValues("floatdv").get(0));
        assertEquals(2L, reader.getNumericDocValues("intdv").get(0));
        assertEquals(Double.doubleToLongBits(3), reader.getNumericDocValues("doubledv").get(0));
        assertEquals(4L, reader.getNumericDocValues("longdv").get(0));

        final IndexSchema schema = core.getLatestSchema();
        final SchemaField floatDv = schema.getField("floatdv");
        final SchemaField intDv = schema.getField("intdv");
        final SchemaField doubleDv = schema.getField("doubledv");
        final SchemaField longDv = schema.getField("longdv");

        FunctionValues values =
            floatDv
                .getType()
                .getValueSource(floatDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(1f, values.floatVal(0), 0f);
        assertEquals(1f, values.objectVal(0));
        values =
            intDv
                .getType()
                .getValueSource(intDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(2, values.intVal(0));
        assertEquals(2, values.objectVal(0));
        values =
            doubleDv
                .getType()
                .getValueSource(doubleDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(3d, values.doubleVal(0), 0d);
        assertEquals(3d, values.objectVal(0));
        values =
            longDv
                .getType()
                .getValueSource(longDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(4L, values.longVal(0));
        assertEquals(4L, values.objectVal(0));
      } finally {
        searcherRef.decref();
      }
    }
  }
 private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
   int numEntries = 0;
   int fieldNumber = meta.readVInt();
   while (fieldNumber != -1) {
     numEntries++;
     FieldInfo info = infos.fieldInfo(fieldNumber);
     if (info == null) {
       // trickier to validate more: because we re-use for norms, because we use multiple entries
       // for "composite" types like sortedset, etc.
       throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
     }
     int fieldType = meta.readByte();
     if (fieldType == NUMBER) {
       NumericEntry entry = new NumericEntry();
       entry.offset = meta.readLong();
       entry.format = meta.readByte();
       switch (entry.format) {
         case DELTA_COMPRESSED:
         case TABLE_COMPRESSED:
         case GCD_COMPRESSED:
         case UNCOMPRESSED:
           break;
         default:
           throw new CorruptIndexException("Unknown format: " + entry.format, meta);
       }
       if (entry.format != UNCOMPRESSED) {
         entry.packedIntsVersion = meta.readVInt();
       }
       numerics.put(info.name, entry);
     } else if (fieldType == BYTES) {
       BinaryEntry entry = new BinaryEntry();
       entry.offset = meta.readLong();
       entry.numBytes = meta.readLong();
       entry.minLength = meta.readVInt();
       entry.maxLength = meta.readVInt();
       if (entry.minLength != entry.maxLength) {
         entry.packedIntsVersion = meta.readVInt();
         entry.blockSize = meta.readVInt();
       }
       binaries.put(info.name, entry);
     } else if (fieldType == FST) {
       FSTEntry entry = new FSTEntry();
       entry.offset = meta.readLong();
       entry.numOrds = meta.readVLong();
       fsts.put(info.name, entry);
     } else {
       throw new CorruptIndexException("invalid entry type: " + fieldType, meta);
     }
     fieldNumber = meta.readVInt();
   }
   return numEntries;
 }
  @Override
  public void write(
      Directory directory,
      SegmentInfo segmentInfo,
      String segmentSuffix,
      FieldInfos infos,
      IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (IndexOutput output = directory.createOutput(fileName, context)) {
      CodecUtil.writeHeader(
          output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
      output.writeVInt(infos.size());
      for (FieldInfo fi : infos) {
        IndexOptions indexOptions = fi.getIndexOptions();
        byte bits = 0x0;
        if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
        if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
        if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
        if (fi.getIndexOptions() != IndexOptions.NONE) {
          bits |= Lucene46FieldInfosFormat.IS_INDEXED;
          assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0
              || !fi.hasPayloads();
          if (indexOptions == IndexOptions.DOCS) {
            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
          }
        }
        output.writeString(fi.name);
        output.writeVInt(fi.number);
        output.writeByte(bits);

        // pack the DV types in one byte
        final byte dv = docValuesByte(fi.getDocValuesType());
        final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE);
        assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
        byte val = (byte) (0xff & ((nrm << 4) | dv));
        output.writeByte(val);
        output.writeLong(fi.getDocValuesGen());
        output.writeStringStringMap(fi.attributes());
      }
      CodecUtil.writeFooter(output);
    }
  }
  @Override
  public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
    in.seek(offsets[n]);

    while (true) {
      readLine();
      if (StringHelper.startsWith(scratch.get(), FIELD) == false) {
        break;
      }
      int fieldNumber = parseIntAt(FIELD.length);
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      readLine();
      assert StringHelper.startsWith(scratch.get(), NAME);
      readLine();
      assert StringHelper.startsWith(scratch.get(), TYPE);

      final BytesRef type;
      if (equalsAt(TYPE_STRING, scratch.get(), TYPE.length)) {
        type = TYPE_STRING;
      } else if (equalsAt(TYPE_BINARY, scratch.get(), TYPE.length)) {
        type = TYPE_BINARY;
      } else if (equalsAt(TYPE_INT, scratch.get(), TYPE.length)) {
        type = TYPE_INT;
      } else if (equalsAt(TYPE_LONG, scratch.get(), TYPE.length)) {
        type = TYPE_LONG;
      } else if (equalsAt(TYPE_FLOAT, scratch.get(), TYPE.length)) {
        type = TYPE_FLOAT;
      } else if (equalsAt(TYPE_DOUBLE, scratch.get(), TYPE.length)) {
        type = TYPE_DOUBLE;
      } else {
        throw new RuntimeException("unknown field type");
      }

      switch (visitor.needsField(fieldInfo)) {
        case YES:
          readField(type, fieldInfo, visitor);
          break;
        case NO:
          readLine();
          assert StringHelper.startsWith(scratch.get(), VALUE);
          break;
        case STOP:
          return;
      }
    }
  }
Beispiel #14
0
  final Document doc(int n, FieldSelector fieldSelector) throws IOException {
    indexStream.seek(n * 8L);
    long position = indexStream.readLong();
    fieldsStream.seek(position);

    Document doc = new Document();
    int numFields = fieldsStream.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = fieldsStream.readVInt();
      FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
      FieldSelectorResult acceptField =
          fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);

      byte bits = fieldsStream.readByte();
      boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
      boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
      boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
      // TODO: Find an alternative approach here if this list continues to grow beyond the
      // list of 5 or 6 currently here.  See Lucene 762 for discussion
      if (acceptField.equals(FieldSelectorResult.LOAD)) {
        addField(doc, fi, binary, compressed, tokenize);
      } else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
        addFieldForMerge(doc, fi, binary, compressed, tokenize);
      } else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) {
        addField(doc, fi, binary, compressed, tokenize);
        break; // Get out of this loop
      } else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
        addFieldLazy(doc, fi, binary, compressed, tokenize);
      } else if (acceptField.equals(FieldSelectorResult.SIZE)) {
        skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
      } else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)) {
        addFieldSize(doc, fi, binary, compressed);
        break;
      } else {
        skipField(binary, compressed);
      }
    }

    return doc;
  }
  @Override
  public void merge(MergeState mergeState) throws IOException {
    if (mergeState.segmentInfo.getIndexSort() != null) {
      // TODO: can we gain back some optos even if index is sorted?  E.g. if sort results in large
      // chunks of contiguous docs from one sub
      // being copied over...?
      super.merge(mergeState);
      return;
    }

    for (PointsReader reader : mergeState.pointsReaders) {
      if (reader instanceof Lucene60PointsReader == false) {
        // We can only bulk merge when all to-be-merged segments use our format:
        super.merge(mergeState);
        return;
      }
    }
    for (PointsReader reader : mergeState.pointsReaders) {
      if (reader != null) {
        reader.checkIntegrity();
      }
    }

    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
      if (fieldInfo.getPointDimensionCount() != 0) {
        if (fieldInfo.getPointDimensionCount() == 1) {

          boolean singleValuePerDoc = true;

          // Worst case total maximum size (if none of the points are deleted):
          long totMaxSize = 0;
          for (int i = 0; i < mergeState.pointsReaders.length; i++) {
            PointsReader reader = mergeState.pointsReaders[i];
            if (reader != null) {
              FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
              FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
              if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                PointValues values = reader.getValues(fieldInfo.name);
                if (values != null) {
                  totMaxSize += values.size();
                  singleValuePerDoc &= values.size() == values.getDocCount();
                }
              }
            }
          }

          // System.out.println("MERGE: field=" + fieldInfo.name);
          // Optimize the 1D case to use BKDWriter.merge, which does a single merge sort of the
          // already sorted incoming segments, instead of trying to sort all points again as if
          // we were simply reindexing them:
          try (BKDWriter writer =
              new BKDWriter(
                  writeState.segmentInfo.maxDoc(),
                  writeState.directory,
                  writeState.segmentInfo.name,
                  fieldInfo.getPointDimensionCount(),
                  fieldInfo.getPointNumBytes(),
                  maxPointsInLeafNode,
                  maxMBSortInHeap,
                  totMaxSize,
                  singleValuePerDoc)) {
            List<BKDReader> bkdReaders = new ArrayList<>();
            List<MergeState.DocMap> docMaps = new ArrayList<>();
            for (int i = 0; i < mergeState.pointsReaders.length; i++) {
              PointsReader reader = mergeState.pointsReaders[i];

              if (reader != null) {

                // we confirmed this up above
                assert reader instanceof Lucene60PointsReader;
                Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;

                // NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to
                // this
                // reader's FieldInfo as we do below) because field numbers can easily be different
                // when addIndexes(Directory...) copies over segments from another index:

                FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                  BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
                  if (bkdReader != null) {
                    bkdReaders.add(bkdReader);
                    docMaps.add(mergeState.docMaps[i]);
                  }
                }
              }
            }

            long fp = writer.merge(dataOut, docMaps, bkdReaders);
            if (fp != -1) {
              indexFPs.put(fieldInfo.name, fp);
            }
          }
        } else {
          mergeOneField(mergeState, fieldInfo);
        }
      }
    }

    finish();
  }
  // Tests whether the DocumentWriter correctly enable the
  // omitTermFreqAndPositions bit in the FieldInfo
  public void testPositions() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
    Document d = new Document();

    // f1,f2,f3: docs only
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS);

    Field f1 = newField("f1", "This field has docs only", ft);
    d.add(f1);

    Field f2 = newField("f2", "This field has docs only", ft);
    d.add(f2);

    Field f3 = newField("f3", "This field has docs only", ft);
    d.add(f3);

    FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED);
    ft2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    // f4,f5,f6 docs and freqs
    Field f4 = newField("f4", "This field has docs and freqs", ft2);
    d.add(f4);

    Field f5 = newField("f5", "This field has docs and freqs", ft2);
    d.add(f5);

    Field f6 = newField("f6", "This field has docs and freqs", ft2);
    d.add(f6);

    FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED);
    ft3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    // f7,f8,f9 docs/freqs/positions
    Field f7 = newField("f7", "This field has docs and freqs and positions", ft3);
    d.add(f7);

    Field f8 = newField("f8", "This field has docs and freqs and positions", ft3);
    d.add(f8);

    Field f9 = newField("f9", "This field has docs and freqs and positions", ft3);
    d.add(f9);

    writer.addDocument(d);
    writer.forceMerge(1);

    // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
    // and docs/freqs/positions for f3, f6, f9
    d = new Document();

    // f1,f4,f7: docs only
    f1 = newField("f1", "This field has docs only", ft);
    d.add(f1);

    f4 = newField("f4", "This field has docs only", ft);
    d.add(f4);

    f7 = newField("f7", "This field has docs only", ft);
    d.add(f7);

    // f2, f5, f8: docs and freqs
    f2 = newField("f2", "This field has docs and freqs", ft2);
    d.add(f2);

    f5 = newField("f5", "This field has docs and freqs", ft2);
    d.add(f5);

    f8 = newField("f8", "This field has docs and freqs", ft2);
    d.add(f8);

    // f3, f6, f9: docs and freqs and positions
    f3 = newField("f3", "This field has docs and freqs and positions", ft3);
    d.add(f3);

    f6 = newField("f6", "This field has docs and freqs and positions", ft3);
    d.add(f6);

    f9 = newField("f9", "This field has docs and freqs and positions", ft3);
    d.add(f9);

    writer.addDocument(d);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    // docs + docs = docs
    assertEquals(IndexOptions.DOCS, fi.fieldInfo("f1").getIndexOptions());
    // docs + docs/freqs = docs
    assertEquals(IndexOptions.DOCS, fi.fieldInfo("f2").getIndexOptions());
    // docs + docs/freqs/pos = docs
    assertEquals(IndexOptions.DOCS, fi.fieldInfo("f3").getIndexOptions());
    // docs/freqs + docs = docs
    assertEquals(IndexOptions.DOCS, fi.fieldInfo("f4").getIndexOptions());
    // docs/freqs + docs/freqs = docs/freqs
    assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").getIndexOptions());
    // docs/freqs + docs/freqs/pos = docs/freqs
    assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").getIndexOptions());
    // docs/freqs/pos + docs = docs
    assertEquals(IndexOptions.DOCS, fi.fieldInfo("f7").getIndexOptions());
    // docs/freqs/pos + docs/freqs = docs/freqs
    assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").getIndexOptions());
    // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
    assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").getIndexOptions());

    reader.close();
    ram.close();
  }
Beispiel #17
0
  public static List<Element> handlePropertyName(
      String[] propertyNames,
      ServiceContext context,
      boolean freq,
      int maxRecords,
      String cswServiceSpecificConstraint,
      LuceneConfig luceneConfig)
      throws Exception {

    List<Element> domainValuesList = null;

    if (Log.isDebugEnabled(Geonet.CSW))
      Log.debug(
          Geonet.CSW,
          "Handling property names '"
              + Arrays.toString(propertyNames)
              + "' with max records of "
              + maxRecords);

    for (int i = 0; i < propertyNames.length; i++) {

      if (i == 0) domainValuesList = new ArrayList<Element>();

      // Initialize list of values element.
      Element listOfValues = null;

      // Generate DomainValues element
      Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW);

      // FIXME what should be the type ???
      domainValues.setAttribute("type", "csw:Record");

      String property = propertyNames[i].trim();

      // Set propertyName in any case.
      Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW);
      domainValues.addContent(pn.setText(property));

      GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
      SearchManager sm = gc.getSearchmanager();

      IndexAndTaxonomy indexAndTaxonomy = sm.getNewIndexReader(null);
      try {
        GeonetworkMultiReader reader = indexAndTaxonomy.indexReader;
        BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context);
        BooleanQuery query = null;

        // Apply CSW service specific constraint
        if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) {
          Query constraintQuery =
              CatalogSearcher.getCswServiceSpecificConstraintQuery(
                  cswServiceSpecificConstraint, luceneConfig);

          query = new BooleanQuery();

          BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false);

          query.add(groupsQuery, occur);
          query.add(constraintQuery, occur);

        } else {
          query = groupsQuery;
        }

        List<Pair<String, Boolean>> sortFields =
            Collections.singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true));
        Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false);
        CachingWrapperFilter filter = null;

        Pair<TopDocs, Element> searchResults =
            LuceneSearcher.doSearchAndMakeSummary(
                maxRecords,
                0,
                maxRecords,
                context.getLanguage(),
                null,
                reader,
                query,
                filter,
                sort,
                null,
                false,
                false,
                false,
                false // Scoring is useless for GetDomain operation
                );
        TopDocs hits = searchResults.one();

        try {
          // Get mapped lucene field in CSW configuration
          String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase());
          if (indexField != null) property = indexField;

          // check if params asked is in the index using getFieldNames ?
          FieldInfos fi = new SlowCompositeReaderWrapper(reader).getFieldInfos();
          if (fi.fieldInfo(property) == null) continue;

          boolean isRange = false;
          if (CatalogConfiguration.getGetRecordsRangeFields().contains(property)) isRange = true;

          if (isRange) listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW);
          else listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW);

          Set<String> fields = new HashSet<String>();
          fields.add(property);
          fields.add("_isTemplate");

          // parse each document in the index
          String[] fieldValues;
          SortedSet<String> sortedValues = new TreeSet<String>();
          HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>();
          for (int j = 0; j < hits.scoreDocs.length; j++) {
            DocumentStoredFieldVisitor selector = new DocumentStoredFieldVisitor(fields);
            reader.document(hits.scoreDocs[j].doc, selector);
            Document doc = selector.getDocument();

            // Skip templates and subTemplates
            String[] isTemplate = doc.getValues("_isTemplate");
            if (isTemplate[0] != null && !isTemplate[0].equals("n")) continue;

            // Get doc values for specified property
            fieldValues = doc.getValues(property);
            if (fieldValues == null) continue;

            addtoSortedSet(sortedValues, fieldValues, duplicateValues);
          }

          SummaryComparator valuesComparator =
              new SummaryComparator(SortOption.FREQUENCY, Type.STRING, context.getLanguage(), null);
          TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency =
              new TreeSet<Map.Entry<String, Integer>>(valuesComparator);
          sortedValuesFrequency.addAll(duplicateValues.entrySet());

          if (freq) return createValuesByFrequency(sortedValuesFrequency);
          else listOfValues.addContent(createValuesElement(sortedValues, isRange));

        } finally {
          // any children means that the catalog was unable to determine
          // anything about the specified parameter
          if (listOfValues != null && listOfValues.getChildren().size() != 0)
            domainValues.addContent(listOfValues);

          // Add current DomainValues to the list
          domainValuesList.add(domainValues);
        }
      } finally {
        sm.releaseIndexReader(indexAndTaxonomy);
      }
    }
    return domainValuesList;
  }
Beispiel #18
0
  /**
   * Safe (but, slowish) default method to write every vector field in the document. This default
   * implementation requires that the vectors implement both Fields.size and Terms.size.
   */
  protected final void addAllDocVectors(Fields vectors, FieldInfos fieldInfos) throws IOException {
    if (vectors == null) {
      startDocument(0);
      return;
    }

    final int numFields = vectors.size();
    if (numFields == -1) {
      throw new IllegalStateException("vectors.size() must be implemented (it returned -1)");
    }
    startDocument(numFields);

    final FieldsEnum fieldsEnum = vectors.iterator();
    String fieldName;
    String lastFieldName = null;

    while ((fieldName = fieldsEnum.next()) != null) {
      final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);

      assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0
          : "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
      lastFieldName = fieldName;

      final Terms terms = fieldsEnum.terms();
      if (terms == null) {
        // FieldsEnum shouldn't lie...
        continue;
      }
      final int numTerms = (int) terms.size();
      if (numTerms == -1) {
        throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
      }
      final TermsEnum termsEnum = terms.iterator(null);

      DocsAndPositionsEnum docsAndPositionsEnum = null;

      boolean startedField = false;

      // NOTE: this is tricky, because TermVectors allow
      // indexing offsets but NOT positions.  So we must
      // lazily init the field by checking whether first
      // position we see is -1 or not.

      int termCount = 0;
      while (termsEnum.next() != null) {
        termCount++;

        final int freq = (int) termsEnum.totalTermFreq();

        if (startedField) {
          startTerm(termsEnum.term(), freq);
        }

        // TODO: we need a "query" API where we can ask (via
        // flex API) what this term was indexed with...
        // Both positions & offsets:
        docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, true);
        final boolean hasOffsets;
        boolean hasPositions = false;
        if (docsAndPositionsEnum == null) {
          // Fallback: no offsets
          docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, false);
          hasOffsets = false;
        } else {
          hasOffsets = true;
        }

        if (docsAndPositionsEnum != null) {
          final int docID = docsAndPositionsEnum.nextDoc();
          assert docID != DocIdSetIterator.NO_MORE_DOCS;
          assert docsAndPositionsEnum.freq() == freq;

          for (int posUpto = 0; posUpto < freq; posUpto++) {
            final int pos = docsAndPositionsEnum.nextPosition();
            if (!startedField) {
              assert numTerms > 0;
              hasPositions = pos != -1;
              startField(fieldInfo, numTerms, hasPositions, hasOffsets);
              startTerm(termsEnum.term(), freq);
              startedField = true;
            }
            final int startOffset;
            final int endOffset;
            if (hasOffsets) {
              startOffset = docsAndPositionsEnum.startOffset();
              endOffset = docsAndPositionsEnum.endOffset();
              assert startOffset != -1;
              assert endOffset != -1;
            } else {
              startOffset = -1;
              endOffset = -1;
            }
            assert !hasPositions || pos >= 0;
            addPosition(pos, startOffset, endOffset);
          }
        } else {
          if (!startedField) {
            assert numTerms > 0;
            startField(fieldInfo, numTerms, hasPositions, hasOffsets);
            startTerm(termsEnum.term(), freq);
            startedField = true;
          }
        }
      }
      assert termCount == numTerms;
    }
  }