// LUCENE-1727: make sure doc fields are stored in order
  public void testStoredFieldsOrder() throws Throwable {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();

    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("zzz", "a b c", customType));
    doc.add(newField("aaa", "a b c", customType));
    doc.add(newField("zzz", "1 2 3", customType));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    Document doc2 = r.document(0);
    Iterator<IndexableField> it = doc2.getFields().iterator();
    assertTrue(it.hasNext());
    Field f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "aaa");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "1 2 3");
    assertFalse(it.hasNext());
    r.close();
    w.close();
    d.close();
  }
Beispiel #2
0
 static {
   customType2 = new FieldType(TextField.TYPE_STORED);
   customType2.setStoreTermVectors(true);
   customType2.setStoreTermVectorPositions(true);
   customType2.setStoreTermVectorOffsets(true);
   textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, customType2);
 }
 public Builder(
     String index, @Nullable Settings indexSettings, RootObjectMapper.Builder builder) {
   this.index = index;
   this.indexSettings = indexSettings;
   this.builderContext = new Mapper.BuilderContext(indexSettings, new ContentPath(1));
   this.rootObjectMapper = builder.build(builderContext);
   IdFieldMapper idFieldMapper = new IdFieldMapper();
   if (indexSettings != null) {
     String idIndexed = indexSettings.get("index.mapping._id.indexed");
     if (idIndexed != null && Booleans.parseBoolean(idIndexed, false)) {
       FieldType fieldType = new FieldType(IdFieldMapper.Defaults.FIELD_TYPE);
       fieldType.setTokenized(false);
       idFieldMapper = new IdFieldMapper(fieldType);
     }
   }
   this.rootMappers.put(IdFieldMapper.class, idFieldMapper);
   // add default mappers, order is important (for example analyzer should come before the rest
   // to set context.analyzer)
   this.rootMappers.put(SizeFieldMapper.class, new SizeFieldMapper());
   this.rootMappers.put(IndexFieldMapper.class, new IndexFieldMapper());
   this.rootMappers.put(SourceFieldMapper.class, new SourceFieldMapper());
   this.rootMappers.put(TypeFieldMapper.class, new TypeFieldMapper());
   this.rootMappers.put(AnalyzerMapper.class, new AnalyzerMapper());
   this.rootMappers.put(AllFieldMapper.class, new AllFieldMapper());
   this.rootMappers.put(BoostFieldMapper.class, new BoostFieldMapper());
   this.rootMappers.put(RoutingFieldMapper.class, new RoutingFieldMapper());
   this.rootMappers.put(TimestampFieldMapper.class, new TimestampFieldMapper());
   this.rootMappers.put(TTLFieldMapper.class, new TTLFieldMapper());
   this.rootMappers.put(UidFieldMapper.class, new UidFieldMapper());
   // don't add parent field, by default its "null"
 }
  /**
   * Create a new instance configured with the provided FieldType options. See {@link
   * #DEFAULT_FIELDTYPE}. a field type is used to articulate the desired options (namely
   * pointValues, docValues, stored). Legacy numerics is configurable this way too.
   */
  public PointVectorStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
    super(ctx, fieldNamePrefix);
    this.fieldNameX = fieldNamePrefix + SUFFIX_X;
    this.fieldNameY = fieldNamePrefix + SUFFIX_Y;

    int numPairs = 0;
    if ((this.hasStored = fieldType.stored())) {
      numPairs++;
    }
    if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
      numPairs++;
    }
    if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
      numPairs++;
    }
    if (fieldType.indexOptions() != IndexOptions.NONE && fieldType.numericType() != null) {
      if (hasPointVals) {
        throw new IllegalArgumentException(
            "pointValues and LegacyNumericType are mutually exclusive");
      }
      if (fieldType.numericType() != FieldType.LegacyNumericType.DOUBLE) {
        throw new IllegalArgumentException(
            getClass() + " does not support " + fieldType.numericType());
      }
      numPairs++;
      legacyNumericFieldType = new FieldType(LegacyDoubleField.TYPE_NOT_STORED);
      legacyNumericFieldType.setNumericPrecisionStep(fieldType.numericPrecisionStep());
      legacyNumericFieldType.freeze();
    } else {
      legacyNumericFieldType = null;
    }
    this.fieldsLen = numPairs * 2;
  }
  private void indexDocument(IndexWriter iw, ProcessedDocument parsedDoc) throws IOException {

    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(false);

    doc.add(new Field(INDEX_FIELD_CONTENT, parsedDoc.getText(), customType));

    doc.add(new StringField(INDEX_FIELD_URL, parsedDoc.getDocumentURL(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_ID, parsedDoc.getDocumentId(), Field.Store.YES));

    doc.add(new TextField(INDEX_FIELD_TITLE, parsedDoc.getDocumentTitle(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_TYPE, parsedDoc.getDocumentType(), Field.Store.YES));

    /**
     * TODO: 2.2 -- The effect of boosting (Book Section 2.1.2)
     *
     * <p>Uncomment the lines below to demonstrate the effect of boosting
     */
    // if ( parsedDoc.getDocumentId().equals("g1-d13")) {
    // doc.setBoost(2);
    // }

    iw.addDocument(doc);
  }
  /**
   * Subclass can override this method to change the field type of the text field e.g. to change the
   * index options
   */
  protected FieldType getTextFieldType() {
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_ONLY);
    ft.setOmitNorms(true);

    return ft;
  }
  /** make sure we downgrade positions and payloads correctly */
  public void testMixing() throws Exception {
    // no positions
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);

    for (int i = 0; i < 20; i++) {
      Document doc = new Document();
      if (i < 19 && random().nextBoolean()) {
        for (int j = 0; j < 50; j++) {
          doc.add(new TextField("foo", "i have positions", Field.Store.NO));
        }
      } else {
        for (int j = 0; j < 50; j++) {
          doc.add(new Field("foo", "i have no positions", ft));
        }
      }
      iw.addDocument(doc);
      iw.commit();
    }

    if (random().nextBoolean()) {
      iw.forceMerge(1);
    }

    DirectoryReader ir = iw.getReader();
    FieldInfos fis = MultiFields.getMergedFieldInfos(ir);
    assertEquals(IndexOptions.DOCS_AND_FREQS, fis.fieldInfo("foo").getIndexOptions());
    assertFalse(fis.fieldInfo("foo").hasPayloads());
    iw.close();
    ir.close();
    dir.close(); // checkindex
  }
  public void testBasic() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    Field f = newField("foo", "this is a test test", ft);
    doc.add(f);
    for (int i = 0; i < 100; i++) {
      w.addDocument(doc);
    }

    IndexReader reader = w.getReader();
    w.close();

    assertNotNull(MultiFields.getTermPositionsEnum(reader, "foo", new BytesRef("test")));

    PostingsEnum de =
        TestUtil.docs(random(), reader, "foo", new BytesRef("test"), null, PostingsEnum.FREQS);
    while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      assertEquals(2, de.freq());
    }

    reader.close();
    dir.close();
  }
  // Verifies no *.nrm exists when all fields omit norms:
  public void testNoNrmFile() throws Throwable {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f1 = newField("f1", "This field has no norms", customType);
    d.add(f1);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    writer.commit();

    assertNoNrm(ram);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoNrm(ram);
    ram.close();
  }
  @Override
  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
    if (!fieldType().indexed()) {
      return null;
    }

    final NumericType numericType = fieldType().numericType();
    if (numericType != null) {
      if (!(reuse instanceof NumericTokenStream
          && ((NumericTokenStream) reuse).getPrecisionStep() == type.numericPrecisionStep())) {
        // lazy init the TokenStream as it is heavy to instantiate
        // (attributes,...) if not needed (stored field loading)
        reuse = new NumericTokenStream(type.numericPrecisionStep());
      }
      final NumericTokenStream nts = (NumericTokenStream) reuse;
      // initialize value in TokenStream
      final Number val = (Number) fieldsData;
      switch (numericType) {
        case INT:
          nts.setIntValue(val.intValue());
          break;
        case LONG:
          nts.setLongValue(val.longValue());
          break;
        case FLOAT:
          nts.setFloatValue(val.floatValue());
          break;
        case DOUBLE:
          nts.setDoubleValue(val.doubleValue());
          break;
        default:
          throw new AssertionError("Should never get here");
      }
      return reuse;
    }

    if (!fieldType().tokenized()) {
      if (stringValue() == null) {
        throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
      }
      if (!(reuse instanceof StringTokenStream)) {
        // lazy init the TokenStream as it is heavy to instantiate
        // (attributes,...) if not needed (stored field loading)
        reuse = new StringTokenStream();
      }
      ((StringTokenStream) reuse).setValue(stringValue());
      return reuse;
    }

    if (tokenStream != null) {
      return tokenStream;
    } else if (readerValue() != null) {
      return analyzer.tokenStream(name(), readerValue());
    } else if (stringValue() != null) {
      return analyzer.tokenStream(name(), stringValue());
    }

    throw new IllegalArgumentException(
        "Field must have either TokenStream, String, Reader or Number value; got " + this);
  }
 /*
  * Test per field codec support - adding fields with random codecs
  */
 @Test
 public void testStressPerFieldCodec() throws IOException {
   Directory dir = newDirectory(random());
   final int docsPerRound = 97;
   int numRounds = atLeast(1);
   for (int i = 0; i < numRounds; i++) {
     int num = TestUtil.nextInt(random(), 30, 60);
     IndexWriterConfig config =
         newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     config.setOpenMode(OpenMode.CREATE_OR_APPEND);
     IndexWriter writer = newWriter(dir, config);
     for (int j = 0; j < docsPerRound; j++) {
       final Document doc = new Document();
       for (int k = 0; k < num; k++) {
         FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
         customType.setTokenized(random().nextBoolean());
         customType.setOmitNorms(random().nextBoolean());
         Field field =
             newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType);
         doc.add(field);
       }
       writer.addDocument(doc);
     }
     if (random().nextBoolean()) {
       writer.forceMerge(1);
     }
     writer.commit();
     assertEquals((i + 1) * docsPerRound, writer.maxDoc());
     writer.close();
   }
   dir.close();
 }
  @Override
  public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));

    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setOmitNorms(true);
    Field field = newField("field", "", customType);
    doc.add(field);

    NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
    for (int i = 0; i < 1000; i++) {
      field.setStringValue(df.format(i));
      writer.addDocument(doc);
    }

    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
  }
 private static FieldType idFieldType(Settings indexSettings) {
   FieldType fieldType = new FieldType(Defaults.FIELD_TYPE);
   boolean pre2x = Version.indexCreated(indexSettings).before(Version.V_2_0_0);
   if (pre2x && indexSettings.getAsBoolean("index.mapping._id.indexed", true) == false) {
     fieldType.setTokenized(false);
   }
   return fieldType;
 }
 private void addDoc(IndexWriter writer, String text, String count) throws IOException {
   Document doc = new Document();
   doc.add(new Field("ngram", text, StringField.TYPE_NOT_STORED));
   FieldType fieldType = new FieldType();
   fieldType.setStored(true);
   Field countField = new Field("count", count, fieldType);
   doc.add(countField);
   writer.addDocument(doc);
 }
Beispiel #15
0
 @Test
 public void testName() throws Exception {
   FieldType TYPE_STORED = new FieldType();
   TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
   TYPE_STORED.setTokenized(false);
   TYPE_STORED.setStored(true);
   TYPE_STORED.freeze();
   System.out.println(TYPE_STORED);
 }
 /**
  * Sets the boost factor on this field.
  *
  * @throws IllegalArgumentException if this field is not indexed, or if it omits norms.
  * @see #boost()
  */
 public void setBoost(float boost) {
   if (boost != 1.0f) {
     if (type.indexed() == false || type.omitNorms()) {
       throw new IllegalArgumentException(
           "You cannot set an index-time boost on an unindexed field, or one that omits norms");
     }
   }
   this.boost = boost;
 }
 /**
  * Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized()
  * to return true. May be combined with stored values from stringValue() or getBinaryValue()
  */
 public void setTokenStream(TokenStream tokenStream) {
   if (!type.indexed() || !type.tokenized()) {
     throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized");
   }
   if (type.numericType() != null) {
     throw new IllegalArgumentException("cannot set private TokenStream on numeric fields");
   }
   this.tokenStream = tokenStream;
 }
  public void test10kPulsed() throws Exception {
    // we always run this test with pulsing codec.
    Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1));

    File f = _TestUtil.getTempDir("10kpulsed");
    BaseDirectoryWrapper dir = newFSDirectory(f);
    dir.setCheckIndexOnClose(false); // we do this ourselves explicitly
    RandomIndexWriter iw =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));

    Document document = new Document();
    FieldType ft = new FieldType(TextField.TYPE_STORED);

    switch (_TestUtil.nextInt(random(), 0, 2)) {
      case 0:
        ft.setIndexOptions(IndexOptions.DOCS_ONLY);
        break;
      case 1:
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        break;
      default:
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        break;
    }

    Field field = newField("field", "", ft);
    document.add(field);

    NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));

    for (int i = 0; i < 10050; i++) {
      field.setStringValue(df.format(i));
      iw.addDocument(document);
    }

    IndexReader ir = iw.getReader();
    iw.close();

    TermsEnum te = MultiFields.getTerms(ir, "field").iterator(null);
    DocsEnum de = null;

    for (int i = 0; i < 10050; i++) {
      String expected = df.format(i);
      assertEquals(expected, te.next().utf8ToString());
      de = _TestUtil.docs(random(), te, null, de, 0);
      assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
    }
    ir.close();

    _TestUtil.checkIndex(dir);
    dir.close();
  }
Beispiel #19
0
 private void addDoc(IndexWriter writer, DBObject document) throws IOException {
   Document luceneDoc = new Document();
   FieldType type = new FieldType();
   type.setIndexed(true);
   luceneDoc.add(
       new Field("name", (String) document.get("name"), Field.Store.YES, Field.Index.ANALYZED));
   luceneDoc.add(
       new Field("body", (String) document.get("body"), Field.Store.YES, Field.Index.ANALYZED));
   writer.addDocument(luceneDoc);
 }
  /**
   * Used for adding a document when a field needs to be created from a type and a string.
   *
   * <p>By default, the indexed value is the same as the stored value (taken from toInternal()).
   * Having a different representation for external, internal, and indexed would present quite a few
   * problems given the current Lucene architecture. An analyzer for adding docs would need to
   * translate internal-&gt;indexed while an analyzer for querying would need to translate
   * external-&gt;indexed.
   *
   * <p>The only other alternative to having internal==indexed would be to have internal==external.
   * In this case, toInternal should convert to the indexed representation, toExternal() should do
   * nothing, and createField() should *not* call toInternal, but use the external value and set
   * tokenized=true to get Lucene to convert to the internal(indexed) form. :TODO: clean up and
   * clarify this explanation.
   *
   * @see #toInternal
   */
  public StorableField createField(SchemaField field, Object value, float boost) {
    if (!field.indexed() && !field.stored()) {
      if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field);
      return null;
    }

    String val;
    try {
      val = toInternal(value.toString());
    } catch (RuntimeException e) {
      throw new SolrException(
          SolrException.ErrorCode.SERVER_ERROR,
          "Error while creating field '" + field + "' from value '" + value + "'",
          e);
    }
    if (val == null) return null;

    org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
    newType.setTokenized(field.isTokenized());
    newType.setStored(field.stored());
    newType.setOmitNorms(field.omitNorms());
    newType.setIndexOptions(field.indexed() ? getIndexOptions(field, val) : IndexOptions.NONE);
    newType.setStoreTermVectors(field.storeTermVector());
    newType.setStoreTermVectorOffsets(field.storeTermOffsets());
    newType.setStoreTermVectorPositions(field.storeTermPositions());
    newType.setStoreTermVectorPayloads(field.storeTermPayloads());

    return createField(field.getName(), val, newType, boost);
  }
Beispiel #21
0
  /*
   * If you write your own analyzer please register it here
   */
  static {
    FileAnalyzerFactory[] analyzers = {
      DEFAULT_ANALYZER_FACTORY,
      new IgnorantAnalyzerFactory(),
      new BZip2AnalyzerFactory(),
      new XMLAnalyzerFactory(),
      new TroffAnalyzerFactory(),
      new ELFAnalyzerFactory(),
      new JavaClassAnalyzerFactory(),
      new ImageAnalyzerFactory(),
      JarAnalyzerFactory.DEFAULT_INSTANCE,
      ZipAnalyzerFactory.DEFAULT_INSTANCE,
      new TarAnalyzerFactory(),
      new CAnalyzerFactory(),
      new CSharpAnalyzerFactory(),
      new VBAnalyzerFactory(),
      new CxxAnalyzerFactory(),
      new ErlangAnalyzerFactory(),
      new ShAnalyzerFactory(),
      PlainAnalyzerFactory.DEFAULT_INSTANCE,
      new UuencodeAnalyzerFactory(),
      new GZIPAnalyzerFactory(),
      new JavaAnalyzerFactory(),
      new JavaScriptAnalyzerFactory(),
      new PythonAnalyzerFactory(),
      new RustAnalyzerFactory(),
      new PerlAnalyzerFactory(),
      new PhpAnalyzerFactory(),
      new LispAnalyzerFactory(),
      new TclAnalyzerFactory(),
      new ScalaAnalyzerFactory(),
      new ClojureAnalyzerFactory(),
      new SQLAnalyzerFactory(),
      new PLSQLAnalyzerFactory(),
      new FortranAnalyzerFactory(),
      new HaskellAnalyzerFactory(),
      new GolangAnalyzerFactory(),
      new LuaAnalyzerFactory(),
      new PascalAnalyzerFactory()
    };

    for (FileAnalyzerFactory analyzer : analyzers) {
      registerAnalyzer(analyzer);
    }

    for (FileAnalyzerFactory analyzer : analyzers) {
      if (analyzer.getName() != null && !analyzer.getName().isEmpty()) {
        fileTypeDescriptions.put(analyzer.getAnalyzer().getFileTypeName(), analyzer.getName());
      }
    }

    string_ft_stored_nanalyzed_norms.setOmitNorms(false);
    string_ft_nstored_nanalyzed_norms.setOmitNorms(false);
  }
Beispiel #22
0
 /** Expert API */
 public BinaryPoint(String name, byte[] packedPoint, FieldType type) {
   super(name, packedPoint, type);
   if (packedPoint.length != type.pointDimensionCount() * type.pointNumBytes()) {
     throw new IllegalArgumentException(
         "packedPoint is length="
             + packedPoint.length
             + " but type.pointDimensionCount()="
             + type.pointDimensionCount()
             + " and type.pointNumBytes()="
             + type.pointNumBytes());
   }
 }
  public void testReadSkip() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    FieldType ft = new FieldType();
    ft.setStored(true);
    ft.freeze();

    final String string = _TestUtil.randomSimpleString(random(), 50);
    final byte[] bytes = string.getBytes("UTF-8");
    final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
    final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
    final float f = random().nextFloat();
    final double d = random().nextDouble();

    List<Field> fields =
        Arrays.asList(
            new Field("bytes", bytes, ft),
            new Field("string", string, ft),
            new LongField("long", l, Store.YES),
            new IntField("int", i, Store.YES),
            new FloatField("float", f, Store.YES),
            new DoubleField("double", d, Store.YES));

    for (int k = 0; k < 100; ++k) {
      Document doc = new Document();
      for (Field fld : fields) {
        doc.add(fld);
      }
      iw.w.addDocument(doc);
    }
    iw.commit();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final int docID = random().nextInt(100);
    for (Field fld : fields) {
      String fldName = fld.name();
      final Document sDoc = reader.document(docID, Collections.singleton(fldName));
      final IndexableField sField = sDoc.getField(fldName);
      if (Field.class.equals(fld.getClass())) {
        assertEquals(fld.binaryValue(), sField.binaryValue());
        assertEquals(fld.stringValue(), sField.stringValue());
      } else {
        assertEquals(fld.numericValue(), sField.numericValue());
      }
    }
    reader.close();
    iw.close();
    dir.close();
  }
  // Tests whether merging of docs that have different
  // omitNorms for the same field works
  public void testMixedMerge() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy(2)));
    Document d = new Document();

    // this field will have norms
    Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
    d.add(f1);

    // this field will NOT have norms
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f2 = newField("f2", "This field has NO norms in all docs", customType);
    d.add(f2);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    // now we add another document which has norms for field f2 and not for f1 and verify if the
    // SegmentMerger
    // keep things constant
    d = new Document();

    // Reverese
    d.add(newField("f1", "This field has norms", customType));

    d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

    reader.close();
    ram.close();
  }
  public NTriplesFileLuceneSyntacticIndexCreator(
      InputStream nTriplesStream, String indexPath, String searchField) throws IOException {
    // setup the index
    Directory directory = FSDirectory.open(new File(indexPath));

    // setup the index analyzer
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    indexWriterConfig.setRAMBufferSizeMB(1024.0);
    indexWriterConfig.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, indexWriterConfig);

    System.out.println("Creating index ...");

    // setup the index fields, here two fields, for URI and text
    FieldType stringType = new FieldType(StringField.TYPE_STORED);
    stringType.setStoreTermVectors(false);
    FieldType textType = new FieldType(TextField.TYPE_STORED);
    textType.setStoreTermVectors(false);

    Set<Document> documents = new HashSet<Document>();

    Iterator<Triple> iterator =
        RiotReader.createIteratorTriples(nTriplesStream, Lang.NTRIPLES, null);

    Triple triple;
    String text;
    String uri;
    Document doc;
    int i = 0;
    while (iterator.hasNext()) {
      triple = iterator.next();

      uri = triple.getSubject().getURI();
      text = triple.getObject().getLiteralLexicalForm();

      doc = new Document();
      doc.add(new Field("uri", uri, stringType));
      doc.add(new Field(searchField, text, textType));

      writer.addDocument(doc);
      if (i++ % 10000 == 0) {
        //				writer.commit();
        System.out.println(i);
      }
    }

    writer.commit();
    writer.close();
  }
Beispiel #26
0
  public static Document createDocument(int n, String indexName, int numFields) {
    StringBuilder sb = new StringBuilder();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);

    FieldType customType1 = new FieldType(StringField.TYPE_STORED);
    customType1.setStoreTermVectors(true);
    customType1.setStoreTermVectorPositions(true);
    customType1.setStoreTermVectorOffsets(true);

    final Document doc = new Document();
    doc.add(new Field("id", Integer.toString(n), customType1));
    doc.add(new Field("indexname", indexName, customType1));
    sb.append("a");
    sb.append(n);
    doc.add(new Field("field1", sb.toString(), customType));
    sb.append(" b");
    sb.append(n);
    for (int i = 1; i < numFields; i++) {
      doc.add(new Field("field" + (i + 1), sb.toString(), customType));
    }
    return doc;
  }
  @Test
  public void testLotsOfPhrases() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(
                TEST_VERSION_CURRENT,
                new MockAnalyzer(
                    random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    String[] terms = {"org", "apache", "lucene"};
    int iters = atLeast(1000);
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < iters; i++) {
      builder.append(terms[random().nextInt(terms.length)]).append(" ");
      if (random().nextInt(6) == 3) {
        builder.append("elasticsearch").append(" ");
      }
    }
    Document doc = new Document();
    Field field = new Field("field", builder.toString(), type);
    doc.add(field);
    writer.addDocument(doc);
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "org"));
    query.add(new Term("field", "apache"));
    query.add(new Term("field", "lucene"));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
    IndexReader reader = DirectoryReader.open(writer, true);
    IndexSearcher searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10);
    assertEquals(1, hits.totalHits);
    XFieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
    String[] bestFragments =
        highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
    for (int i = 0; i < bestFragments.length; i++) {
      String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
      assertFalse(result.contains("org apache lucene"));
    }
    reader.close();
    writer.close();
    dir.close();
  }
 /**
  * Persists all snapshots information. If the given id and segment are not null, it persists their
  * information as well.
  */
 private void persistSnapshotInfos(String id, String segment) throws IOException {
   writer.deleteAll();
   Document d = new Document();
   FieldType ft = new FieldType();
   ft.setStored(true);
   d.add(new Field(SNAPSHOTS_ID, "", ft));
   for (Entry<String, String> e : super.getSnapshots().entrySet()) {
     d.add(new Field(e.getKey(), e.getValue(), ft));
   }
   if (id != null) {
     d.add(new Field(id, segment, ft));
   }
   writer.addDocument(d);
   writer.commit();
 }
 /**
  * Create field with String value.
  *
  * @param name field name
  * @param value string value
  * @param type field type
  * @throws IllegalArgumentException if either the name or value is null, or if the field's type is
  *     neither indexed() nor stored(), or if indexed() is false but storeTermVectors() is true.
  * @throws NullPointerException if the type is null
  */
 public Field(String name, String value, FieldType type) {
   if (name == null) {
     throw new IllegalArgumentException("name cannot be null");
   }
   if (value == null) {
     throw new IllegalArgumentException("value cannot be null");
   }
   if (!type.stored() && !type.indexed()) {
     throw new IllegalArgumentException(
         "it doesn't make sense to have a field that " + "is neither indexed nor stored");
   }
   this.type = type;
   this.name = name;
   this.fieldsData = value;
 }
  static {
    TYPE_NOT_STORED.setOmitNorms(true);
    TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS);
    TYPE_NOT_STORED.setTokenized(false);
    TYPE_NOT_STORED.freeze();

    TYPE_STORED.setOmitNorms(true);
    TYPE_STORED.setIndexOptions(IndexOptions.DOCS);
    TYPE_STORED.setStored(true);
    TYPE_STORED.setTokenized(false);
    TYPE_STORED.freeze();
  }