Java FieldType.setStoreTermVectorPositions Exemples, org.apache.lucene.document.FieldType.setStoreTermVectorPositions Java Exemples

Exemple #1

0

Afficher le fichier

Fichier : DocHelper.java Projet : sugarlisu/solr_4.9.0

  public static Document createDocument(int n, String indexName, int numFields) {
    StringBuilder sb = new StringBuilder();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);

    FieldType customType1 = new FieldType(StringField.TYPE_STORED);
    customType1.setStoreTermVectors(true);
    customType1.setStoreTermVectorPositions(true);
    customType1.setStoreTermVectorOffsets(true);

    final Document doc = new Document();
    doc.add(new Field("id", Integer.toString(n), customType1));
    doc.add(new Field("indexname", indexName, customType1));
    sb.append("a");
    sb.append(n);
    doc.add(new Field("field1", sb.toString(), customType));
    sb.append(" b");
    sb.append(n);
    for (int i = 1; i < numFields; i++) {
      doc.add(new Field("field" + (i + 1), sb.toString(), customType));
    }
    return doc;
  }

Exemple #2

0

Afficher le fichier

Fichier : Field.java Projet : jarvisxiong/read-open-source-code

  /**
   * Translates the pre-4.0 enums for specifying how a field should be indexed into the 4.0 {@link
   * FieldType} approach.
   *
   * @deprecated This is here only to ease transition from the pre-4.0 APIs.
   */
  @Deprecated
  public static final FieldType translateFieldType(
      Store store, Index index, TermVector termVector) {
    final FieldType ft = new FieldType();

    ft.setStored(store == Store.YES);

    switch (index) {
      case ANALYZED:
        ft.setIndexed(true);
        ft.setTokenized(true);
        break;
      case ANALYZED_NO_NORMS:
        ft.setIndexed(true);
        ft.setTokenized(true);
        ft.setOmitNorms(true);
        break;
      case NOT_ANALYZED:
        ft.setIndexed(true);
        ft.setTokenized(false);
        break;
      case NOT_ANALYZED_NO_NORMS:
        ft.setIndexed(true);
        ft.setTokenized(false);
        ft.setOmitNorms(true);
        break;
      case NO:
        break;
    }

    switch (termVector) {
      case NO:
        break;
      case YES:
        ft.setStoreTermVectors(true);
        break;
      case WITH_POSITIONS:
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorPositions(true);
        break;
      case WITH_OFFSETS:
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        break;
      case WITH_POSITIONS_OFFSETS:
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorPositions(true);
        ft.setStoreTermVectorOffsets(true);
        break;
    }
    ft.freeze();
    return ft;
  }

Exemple #3

0

Afficher le fichier

Fichier : LuceneIndexBuilder.java Projet : huiwq1990/BookProgram

  private void indexDocument(IndexWriter iw, ProcessedDocument parsedDoc) throws IOException {

    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(false);

    doc.add(new Field(INDEX_FIELD_CONTENT, parsedDoc.getText(), customType));

    doc.add(new StringField(INDEX_FIELD_URL, parsedDoc.getDocumentURL(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_ID, parsedDoc.getDocumentId(), Field.Store.YES));

    doc.add(new TextField(INDEX_FIELD_TITLE, parsedDoc.getDocumentTitle(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_TYPE, parsedDoc.getDocumentType(), Field.Store.YES));

    /**
     * TODO: 2.2 -- The effect of boosting (Book Section 2.1.2)
     *
     * <p>Uncomment the lines below to demonstrate the effect of boosting
     */
    // if ( parsedDoc.getDocumentId().equals("g1-d13")) {
    // doc.setBoost(2);
    // }

    iw.addDocument(doc);
  }

Exemple #4

0

Afficher le fichier

Fichier : TestIndexWriterMerging.java Projet : rmuir/lucene-solr

  // LUCENE-325: test forceMergeDeletes without waiting, when
  // many adjacent merges are required
  public void testForceMergeDeletes3() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(new MockAnalyzer(random()))
                .setMaxBufferedDocs(2)
                .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setMergePolicy(newLogMergePolicy(50)));

    FieldType customType = new FieldType();
    customType.setStored(true);

    FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
    customType1.setTokenized(false);
    customType1.setStoreTermVectors(true);
    customType1.setStoreTermVectorPositions(true);
    customType1.setStoreTermVectorOffsets(true);

    Document document = new Document();
    Field storedField = newField("stored", "stored", customType);
    document.add(storedField);
    Field termVectorField = newField("termVector", "termVector", customType1);
    document.add(termVectorField);
    Field idField = newStringField("id", "", Field.Store.NO);
    document.add(idField);
    for (int i = 0; i < 98; i++) {
      idField.setStringValue("" + i);
      writer.addDocument(document);
    }
    writer.close();

    IndexReader ir = DirectoryReader.open(dir);
    assertEquals(98, ir.maxDoc());
    assertEquals(98, ir.numDocs());
    ir.close();

    IndexWriterConfig dontMergeConfig =
        new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE);
    writer = new IndexWriter(dir, dontMergeConfig);
    for (int i = 0; i < 98; i += 2) {
      writer.deleteDocuments(new Term("id", "" + i));
    }
    writer.close();
    ir = DirectoryReader.open(dir);
    assertEquals(49, ir.numDocs());
    ir.close();

    writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(3)));
    writer.forceMergeDeletes(false);
    writer.close();
    ir = DirectoryReader.open(dir);
    assertEquals(49, ir.maxDoc());
    assertEquals(49, ir.numDocs());
    ir.close();
    dir.close();
  }

Exemple #5

0

Afficher le fichier

Fichier : FieldType.java Projet : markrmiller/lucene-solr-svn2git

  /**
   * Used for adding a document when a field needs to be created from a type and a string.
   *
   * <p>By default, the indexed value is the same as the stored value (taken from toInternal()).
   * Having a different representation for external, internal, and indexed would present quite a few
   * problems given the current Lucene architecture. An analyzer for adding docs would need to
   * translate internal-&gt;indexed while an analyzer for querying would need to translate
   * external-&gt;indexed.
   *
   * <p>The only other alternative to having internal==indexed would be to have internal==external.
   * In this case, toInternal should convert to the indexed representation, toExternal() should do
   * nothing, and createField() should *not* call toInternal, but use the external value and set
   * tokenized=true to get Lucene to convert to the internal(indexed) form. :TODO: clean up and
   * clarify this explanation.
   *
   * @see #toInternal
   */
  public StorableField createField(SchemaField field, Object value, float boost) {
    if (!field.indexed() && !field.stored()) {
      if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field);
      return null;
    }

    String val;
    try {
      val = toInternal(value.toString());
    } catch (RuntimeException e) {
      throw new SolrException(
          SolrException.ErrorCode.SERVER_ERROR,
          "Error while creating field '" + field + "' from value '" + value + "'",
          e);
    }
    if (val == null) return null;

    org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
    newType.setTokenized(field.isTokenized());
    newType.setStored(field.stored());
    newType.setOmitNorms(field.omitNorms());
    newType.setIndexOptions(field.indexed() ? getIndexOptions(field, val) : IndexOptions.NONE);
    newType.setStoreTermVectors(field.storeTermVector());
    newType.setStoreTermVectorOffsets(field.storeTermOffsets());
    newType.setStoreTermVectorPositions(field.storeTermPositions());
    newType.setStoreTermVectorPayloads(field.storeTermPayloads());

    return createField(field.getName(), val, newType, boost);
  }

Exemple #6

0

Afficher le fichier

Fichier : DocHelper.java Projet : sugarlisu/solr_4.9.0

 static {
   customType2 = new FieldType(TextField.TYPE_STORED);
   customType2.setStoreTermVectors(true);
   customType2.setStoreTermVectorPositions(true);
   customType2.setStoreTermVectorOffsets(true);
   textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, customType2);
 }

Exemple #7

0

Afficher le fichier

Fichier : TestMockAnalyzer.java Projet : PATRIC3/p3_solr

  public void testChangeGaps() throws Exception {
    // LUCENE-5324: check that it is possible to change the wrapper's gaps
    final int positionGap = random().nextInt(1000);
    final int offsetGap = random().nextInt(1000);
    final Analyzer delegate = new MockAnalyzer(random());
    final Analyzer a =
        new DelegatingAnalyzerWrapper(delegate.getReuseStrategy()) {
          @Override
          protected Analyzer getWrappedAnalyzer(String fieldName) {
            return delegate;
          }

          @Override
          public int getPositionIncrementGap(String fieldName) {
            return positionGap;
          }

          @Override
          public int getOffsetGap(String fieldName) {
            return offsetGap;
          }
        };

    final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a);
    final Document doc = new Document();
    final FieldType ft = new FieldType();
    ft.setIndexOptions(IndexOptions.DOCS);
    ft.setTokenized(true);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorOffsets(true);
    doc.add(new Field("f", "a", ft));
    doc.add(new Field("f", "a", ft));
    writer.addDocument(doc);
    final LeafReader reader = getOnlySegmentReader(writer.getReader());
    final Fields fields = reader.getTermVectors(0);
    final Terms terms = fields.terms("f");
    final TermsEnum te = terms.iterator();
    assertEquals(new BytesRef("a"), te.next());
    final PostingsEnum dpe = te.postings(null, PostingsEnum.ALL);
    assertEquals(0, dpe.nextDoc());
    assertEquals(2, dpe.freq());
    assertEquals(0, dpe.nextPosition());
    assertEquals(0, dpe.startOffset());
    final int endOffset = dpe.endOffset();
    assertEquals(1 + positionGap, dpe.nextPosition());
    assertEquals(1 + endOffset + offsetGap, dpe.endOffset());
    assertEquals(null, te.next());
    reader.close();
    writer.close();
    writer.w.getDirectory().close();
  }

Exemple #8

0

Afficher le fichier

Fichier : AbstractTermVectorTests.java Projet : vthacker/elasticsearch

  protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
      if (field.storedPayloads) {
        mapping.put(
            field.name,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                filter = new TypeAsPayloadTokenFilter(filter);
                return new TokenStreamComponents(tokenizer, filter);
              }
            });
      }
    }
    PerFieldAnalyzerWrapper wrapper =
        new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
      Document d = new Document();
      d.add(new Field("id", doc.id, StringField.TYPE_STORED));
      for (int i = 0; i < doc.fieldContent.length; i++) {
        FieldType type = new FieldType(TextField.TYPE_STORED);
        TestFieldSetting fieldSetting = doc.fieldSettings[i];

        type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
        type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
        type.setStoreTermVectorPositions(
            fieldSetting.storedPositions
                || fieldSetting.storedPayloads
                || fieldSetting.storedOffset);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
      }
      writer.updateDocument(new Term("id", doc.id), d);
      writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
  }

Exemple #9

0

Afficher le fichier

Fichier : XFastVectorHighlighterTest.java Projet : rantav/elasticsearch

  @Test
  public void testLotsOfPhrases() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(
                TEST_VERSION_CURRENT,
                new MockAnalyzer(
                    random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    String[] terms = {"org", "apache", "lucene"};
    int iters = atLeast(1000);
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < iters; i++) {
      builder.append(terms[random().nextInt(terms.length)]).append(" ");
      if (random().nextInt(6) == 3) {
        builder.append("elasticsearch").append(" ");
      }
    }
    Document doc = new Document();
    Field field = new Field("field", builder.toString(), type);
    doc.add(field);
    writer.addDocument(doc);
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "org"));
    query.add(new Term("field", "apache"));
    query.add(new Term("field", "lucene"));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
    IndexReader reader = DirectoryReader.open(writer, true);
    IndexSearcher searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10);
    assertEquals(1, hits.totalHits);
    XFieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
    String[] bestFragments =
        highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
    for (int i = 0; i < bestFragments.length; i++) {
      String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
      assertFalse(result.contains("org apache lucene"));
    }
    reader.close();
    writer.close();
    dir.close();
  }

Exemple #10

0

Afficher le fichier

Fichier : TestAddIndexes.java Projet : jbasttdi/lumongo

  // LUCENE-1270
  public void testHangOnClose() throws IOException {

    Directory dir = newDirectory();
    LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(100);
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(5)
                .setMergePolicy(lmp));

    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType));
    for (int i = 0; i < 60; i++) writer.addDocument(doc);

    Document doc2 = new Document();
    FieldType customType2 = new FieldType();
    customType2.setStored(true);
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    for (int i = 0; i < 10; i++) writer.addDocument(doc2);
    writer.close();

    Directory dir2 = newDirectory();
    lmp = new LogByteSizeMergePolicy();
    lmp.setMinMergeMB(0.0001);
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(4);
    writer =
        new IndexWriter(
            dir2,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergeScheduler(new SerialMergeScheduler())
                .setMergePolicy(lmp));
    writer.addIndexes(dir);
    writer.close();
    dir.close();
    dir2.close();
  }

Exemple #11

0

Afficher le fichier

Fichier : AbstractTestCase.java Projet : kushal256/heliosearch

 // make 1 doc with multi valued field
 protected void make1dmfIndex(Analyzer analyzer, String... values) throws Exception {
   IndexWriter writer =
       new IndexWriter(
           dir,
           new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
   Document doc = new Document();
   FieldType customType = new FieldType(TextField.TYPE_STORED);
   customType.setStoreTermVectors(true);
   customType.setStoreTermVectorOffsets(true);
   customType.setStoreTermVectorPositions(true);
   for (String value : values) {
     doc.add(new Field(F, value, customType));
   }
   writer.addDocument(doc);
   writer.close();
   if (reader != null) reader.close();
   reader = DirectoryReader.open(dir);
 }

Exemple #12

0

Afficher le fichier

Fichier : SimpleFragmentsBuilderTest.java Projet : ankit-zettata/solr-analytics

 protected void makeUnstoredIndex() throws Exception {
   IndexWriter writer =
       new IndexWriter(
           dir,
           new IndexWriterConfig(TEST_VERSION_CURRENT, analyzerW).setOpenMode(OpenMode.CREATE));
   Document doc = new Document();
   FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
   customType.setStoreTermVectors(true);
   customType.setStoreTermVectorOffsets(true);
   customType.setStoreTermVectorPositions(true);
   doc.add(new Field(F, "aaa", customType));
   // doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS )
   // );
   writer.addDocument(doc);
   writer.close();
   if (reader != null) reader.close();
   reader = DirectoryReader.open(dir);
 }

Exemple #13

0

Afficher le fichier

Fichier : IndexadorPDF.java Projet : DinamicArea/formacion-lucene

  /**
   * Método para la indexación individual de cada fichero PDF
   *
   * @param f el fichero PDF
   * @param writer el IndexWriter
   * @throws IOException
   */
  public static void indexFile(File f, IndexWriter writer) throws IOException {

    // Cargamos el fichero mediante PDFBox
    PDDocument pddDocument = PDDocument.load(f.getAbsolutePath());
    PDFTextStripper textStripper = new PDFTextStripper();
    int numPages = pddDocument.getNumberOfPages();
    String pageContent;

    // Declaramos un Field propio
    FieldType fieldText = new FieldType();
    fieldText.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    fieldText.setStored(false);
    fieldText.setStoreTermVectorOffsets(true);
    fieldText.setStoreTermVectorPositions(true);
    fieldText.setStoreTermVectors(true);

    // Recorremos e indexamos cada una de las páginas del fichero, almacenando el número de página y
    // el título del fichero, e indexando el contenido
    for (int i = 0; i < numPages; i++) {
      if (i == 0) {
        i++;
      }
      textStripper.setStartPage(i);
      textStripper.setEndPage(i);
      // coger una página
      pageContent = textStripper.getText(pddDocument);
      if (pageContent != null && !pageContent.isEmpty()) {
        pageContent = pageContent.toLowerCase();
      }

      if (pageContent != null) {
        // Declaramos el documento a indexar para esa página

        // Número de página
        // Contenido de la página
        // Título del fichero

        // Añadimos el documento
      }
    }

    // Cerramos el fichero PDF

  }

Exemple #14

0

Afficher le fichier

Fichier : LuceneIndexBuilder.java Projet : huiwq1990/BookProgram

  private void indexDocument(IndexWriter iw, NewsStory newsStory) throws IOException {

    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(false);

    doc.add(new Field(INDEX_FIELD_CONTENT, newsStory.getContent().getText(), customType));

    doc.add(new StringField(INDEX_FIELD_URL, newsStory.getUrl(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_ID, newsStory.getId(), Field.Store.YES));

    doc.add(new TextField(INDEX_FIELD_TITLE, newsStory.getTitle(), Field.Store.YES));

    iw.addDocument(doc);
  }

Exemple #15

0

Afficher le fichier

Fichier : TestMemoryIndexAgainstRAMDir.java Projet : dennisgove/lucene-solr

  public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception {

    final String field_name = "text";
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    if (random().nextBoolean()) {
      mockAnalyzer.setOffsetGap(random().nextInt(100));
    }
    // index into a random directory
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();

    Document doc = new Document();
    doc.add(new Field(field_name, "la la", type));
    doc.add(new Field(field_name, "foo bar foo bar foo", type));

    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.updateDocument(new Term("id", "1"), doc);
    writer.commit();
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);

    // Index document in Memory index
    MemoryIndex memIndex = new MemoryIndex(true);
    memIndex.addField(field_name, "la la", mockAnalyzer);
    memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer);

    // compare term vectors
    Terms ramTv = reader.getTermVector(0, field_name);
    IndexReader memIndexReader = memIndex.createSearcher().getIndexReader();
    TestUtil.checkReader(memIndexReader);
    Terms memTv = memIndexReader.getTermVector(0, field_name);

    compareTermVectors(ramTv, memTv, field_name);
    memIndexReader.close();
    reader.close();
    dir.close();
  }

Exemple #16

0

Afficher le fichier

Fichier : TestPerFieldPostingsFormat2.java Projet : zuoyebushiwo/lucene-solr-lucene_solr_4_10_4

 private void doTestMixedPostings(Codec codec) throws Exception {
   Directory dir = newDirectory();
   IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
   iwc.setCodec(codec);
   RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
   Document doc = new Document();
   FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
   // turn on vectors for the checkindex cross-check
   ft.setStoreTermVectors(true);
   ft.setStoreTermVectorOffsets(true);
   ft.setStoreTermVectorPositions(true);
   Field idField = new Field("id", "", ft);
   Field dateField = new Field("date", "", ft);
   doc.add(idField);
   doc.add(dateField);
   for (int i = 0; i < 100; i++) {
     idField.setStringValue(Integer.toString(random().nextInt(50)));
     dateField.setStringValue(Integer.toString(random().nextInt(100)));
     iw.addDocument(doc);
   }
   iw.close();
   dir.close(); // checkindex
 }

Exemple #17

0

Afficher le fichier

Fichier : Indexer.java Projet : danyaljj/BlackLab

  /**
   * Construct Indexer
   *
   * @param directory the main BlackLab index directory
   * @param create if true, creates a new index; otherwise, appends to existing index
   * @param docIndexerClass how to index the files, or null to autodetect
   * @param indexTemplateFile JSON file to use as template for index structure / metadata (if
   *     creating new index)
   * @throws DocumentFormatException if no DocIndexer was specified and autodetection failed
   * @throws IOException
   */
  public Indexer(
      File directory,
      boolean create,
      Class<? extends DocIndexer> docIndexerClass,
      File indexTemplateFile)
      throws DocumentFormatException, IOException {
    this.docIndexerClass = docIndexerClass;

    searcher = Searcher.openForWriting(directory, create, indexTemplateFile);
    if (!create) searcher.getIndexStructure().setModified();

    if (this.docIndexerClass == null) {
      // No DocIndexer supplied; try to detect it from the index
      // metadata.
      String formatId = searcher.getIndexStructure().getDocumentFormat();
      if (formatId != null && formatId.length() > 0)
        setDocIndexer(DocumentFormats.getIndexerClass(formatId));
      else {
        throw new DocumentFormatException("Cannot detect document format for index!");
      }
    }

    metadataFieldTypeTokenized = new FieldType();
    metadataFieldTypeTokenized.setStored(true);
    metadataFieldTypeTokenized.setIndexed(true);
    metadataFieldTypeTokenized.setTokenized(true);
    metadataFieldTypeTokenized.setOmitNorms(true); // @@@ <-- depending on setting?
    metadataFieldTypeTokenized.setStoreTermVectors(true);
    metadataFieldTypeTokenized.setStoreTermVectorPositions(true);
    metadataFieldTypeTokenized.setStoreTermVectorOffsets(true);
    metadataFieldTypeTokenized.freeze();

    metadataFieldTypeUntokenized = new FieldType(metadataFieldTypeTokenized);
    metadataFieldTypeUntokenized.setTokenized(false);
    metadataFieldTypeUntokenized.freeze();
  }

Exemple #18

0

Afficher le fichier

Fichier : TopicSearchService.java Projet : Kerbores/nutz-book-project

  protected void _add(Topic topic) {
    if (topic == null) return; // 虽然不太可能,还是预防一下吧
    // 暂时不索引评论
    dao.fetchLinks(topic, "replies");
    Document document;
    document = new Document();
    Field field;
    FieldType fieldType;

    // 先加入id
    fieldType = new FieldType();
    fieldType.setIndexed(true); // 索引
    fieldType.setStored(true); // 存储
    fieldType.setStoreTermVectors(true);
    fieldType.setTokenized(true);
    fieldType.setStoreTermVectorPositions(true); // 存储位置
    fieldType.setStoreTermVectorOffsets(true); // 存储偏移量
    field = new Field("id", topic.getId(), fieldType);
    document.add(field);

    // 加入标题
    fieldType = new FieldType();
    fieldType.setIndexed(true); // 索引
    fieldType.setStored(true); // 存储
    fieldType.setStoreTermVectors(true);
    fieldType.setTokenized(true);
    fieldType.setStoreTermVectorPositions(true); // 存储位置
    fieldType.setStoreTermVectorOffsets(true); // 存储偏移量
    field = new Field("title", topic.getTitle(), fieldType);
    document.add(field);

    // 加入文章内容
    fieldType = new FieldType();
    fieldType.setIndexed(true); // 索引
    fieldType.setStored(false); // 存储
    fieldType.setStoreTermVectors(true);
    fieldType.setTokenized(true);
    fieldType.setStoreTermVectorPositions(true); // 存储位置
    fieldType.setStoreTermVectorOffsets(true); // 存储偏移量
    field = new Field("content", topic.getContent(), fieldType);
    document.add(field);

    StringBuilder sb = new StringBuilder();
    if (topic.getReplies() != null) {
      for (TopicReply reply : topic.getReplies()) {
        if (reply == null) continue;
        bigContentService.fill(reply);
        if (reply.getContent() != null) {
          if (sb.length() + reply.getContent().length() > (IndexWriter.MAX_TERM_LENGTH / 3)) {
            break;
          }
          sb.append(reply.getContent());
        }
      }
    }
    fieldType = new FieldType();
    fieldType.setIndexed(true); // 索引
    fieldType.setStored(false); // 存储
    fieldType.setStoreTermVectors(true);
    fieldType.setTokenized(true);
    fieldType.setStoreTermVectorPositions(true); // 存储位置
    fieldType.setStoreTermVectorOffsets(true); // 存储偏移量

    field = new Field("reply", sb.toString(), fieldType);
    document.add(field);

    try {
      luceneIndex.writer.addDocument(document);
    } catch (IOException e) {
      log.debug("add to index fail : id=" + topic.getId());
    } catch (Error e) {
      log.debug("add to index fail : id=" + topic.getId());
    }
  }

Exemple #19

0

Afficher le fichier

Fichier : TestStressIndexing2.java Projet : kushal256/heliosearch

    public void indexDoc() throws IOException {
      Document d = new Document();

      FieldType customType1 = new FieldType(TextField.TYPE_STORED);
      customType1.setTokenized(false);
      customType1.setOmitNorms(true);

      ArrayList<Field> fields = new ArrayList<>();
      String idString = getIdString();
      Field idField = newField("id", idString, customType1);
      fields.add(idField);

      int nFields = nextInt(maxFields);
      for (int i = 0; i < nFields; i++) {

        FieldType customType = new FieldType();
        switch (nextInt(4)) {
          case 0:
            break;
          case 1:
            customType.setStoreTermVectors(true);
            break;
          case 2:
            customType.setStoreTermVectors(true);
            customType.setStoreTermVectorPositions(true);
            break;
          case 3:
            customType.setStoreTermVectors(true);
            customType.setStoreTermVectorOffsets(true);
            break;
        }

        switch (nextInt(4)) {
          case 0:
            customType.setStored(true);
            customType.setOmitNorms(true);
            customType.setIndexed(true);
            fields.add(newField("f" + nextInt(100), getString(1), customType));
            break;
          case 1:
            customType.setIndexed(true);
            customType.setTokenized(true);
            fields.add(newField("f" + nextInt(100), getString(0), customType));
            break;
          case 2:
            customType.setStored(true);
            customType.setStoreTermVectors(false);
            customType.setStoreTermVectorOffsets(false);
            customType.setStoreTermVectorPositions(false);
            fields.add(newField("f" + nextInt(100), getString(0), customType));
            break;
          case 3:
            customType.setStored(true);
            customType.setIndexed(true);
            customType.setTokenized(true);
            fields.add(newField("f" + nextInt(100), getString(bigFieldSize), customType));
            break;
        }
      }

      if (sameFieldOrder) {
        Collections.sort(fields, fieldNameComparator);
      } else {
        // random placement of id field also
        Collections.swap(fields, nextInt(fields.size()), 0);
      }

      for (int i = 0; i < fields.size(); i++) {
        d.add(fields.get(i));
      }
      if (VERBOSE) {
        System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
      }
      w.updateDocument(new Term("id", idString), d);
      // System.out.println(Thread.currentThread().getName() + ": indexing "+d);
      docs.put(idString, d);
    }

Exemple #20

0

Afficher le fichier

Fichier : SimpleFragmentsBuilderTest.java Projet : ankit-zettata/solr-analytics

  public void testRandomDiscreteMultiValueHighlighting() throws Exception {
    String[] randomValues = new String[3 + random().nextInt(10 * RANDOM_MULTIPLIER)];
    for (int i = 0; i < randomValues.length; i++) {
      String randomValue;
      do {
        randomValue = _TestUtil.randomSimpleString(random());
      } while ("".equals(randomValue));
      randomValues[i] = randomValue;
    }

    Directory dir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(newLogMergePolicy()));

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);

    int numDocs = randomValues.length * 5;
    int numFields = 2 + random().nextInt(5);
    int numTerms = 2 + random().nextInt(3);
    List<Doc> docs = new ArrayList<Doc>(numDocs);
    List<Document> documents = new ArrayList<Document>(numDocs);
    Map<String, Set<Integer>> valueToDocId = new HashMap<String, Set<Integer>>();
    for (int i = 0; i < numDocs; i++) {
      Document document = new Document();
      String[][] fields = new String[numFields][numTerms];
      for (int j = 0; j < numFields; j++) {
        String[] fieldValues = new String[numTerms];
        fieldValues[0] = getRandomValue(randomValues, valueToDocId, i);
        StringBuilder builder = new StringBuilder(fieldValues[0]);
        for (int k = 1; k < numTerms; k++) {
          fieldValues[k] = getRandomValue(randomValues, valueToDocId, i);
          builder.append(' ').append(fieldValues[k]);
        }
        document.add(new Field(F, builder.toString(), customType));
        fields[j] = fieldValues;
      }
      docs.add(new Doc(fields));
      documents.add(document);
    }
    writer.addDocuments(documents);
    writer.close();
    IndexReader reader = DirectoryReader.open(dir);

    try {
      int highlightIters = 1 + random().nextInt(120 * RANDOM_MULTIPLIER);
      for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) {
        String queryTerm = randomValues[random().nextInt(randomValues.length)];
        int randomHit = valueToDocId.get(queryTerm).iterator().next();
        List<StringBuilder> builders = new ArrayList<StringBuilder>();
        for (String[] fieldValues : docs.get(randomHit).fieldValues) {
          StringBuilder builder = new StringBuilder();
          boolean hit = false;
          for (int i = 0; i < fieldValues.length; i++) {
            if (queryTerm.equals(fieldValues[i])) {
              builder.append("<b>").append(queryTerm).append("</b>");
              hit = true;
            } else {
              builder.append(fieldValues[i]);
            }
            if (i != fieldValues.length - 1) {
              builder.append(' ');
            }
          }
          if (hit) {
            builders.add(builder);
          }
        }

        FieldQuery fq = new FieldQuery(tq(queryTerm), true, true);
        FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq);

        FieldPhraseList fpl = new FieldPhraseList(stack, fq);
        SimpleFragListBuilder sflb = new SimpleFragListBuilder(100);
        FieldFragList ffl = sflb.createFieldFragList(fpl, 300);

        SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
        sfb.setDiscreteMultiValueHighlighting(true);
        String[] actualFragments = sfb.createFragments(reader, randomHit, F, ffl, numFields);
        assertEquals(builders.size(), actualFragments.length);
        for (int i = 0; i < actualFragments.length; i++) {
          assertEquals(builders.get(i).toString(), actualFragments[i]);
        }
      }
    } finally {
      reader.close();
      dir.close();
    }
  }

Exemple #21

0

Afficher le fichier

Fichier : TestBlockPostingsFormat3.java Projet : jibaro/lucene_solr

  // creates 8 fields with different options and does "duels" of fields against each other
  public void test() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer =
        new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
          @Override
          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader);
            if (fieldName.contains("payloadsFixed")) {
              TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
              return new TokenStreamComponents(tokenizer, filter);
            } else if (fieldName.contains("payloadsVariable")) {
              TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
              return new TokenStreamComponents(tokenizer, filter);
            } else {
              return new TokenStreamComponents(tokenizer);
            }
          }
        };
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
    // TODO we could actually add more fields implemented with different PFs
    // or, just put this test into the usual rotation?
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc.clone());
    Document doc = new Document();
    FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsOnlyType.setStoreTermVectors(true);
    docsOnlyType.setIndexOptions(IndexOptions.DOCS_ONLY);

    FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsAndFreqsType.setStoreTermVectors(true);
    docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn these on for a cross-check
    positionsType.setStoreTermVectors(true);
    positionsType.setStoreTermVectorPositions(true);
    positionsType.setStoreTermVectorOffsets(true);
    positionsType.setStoreTermVectorPayloads(true);
    FieldType offsetsType = new FieldType(positionsType);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    Field field1 = new Field("field1docs", "", docsOnlyType);
    Field field2 = new Field("field2freqs", "", docsAndFreqsType);
    Field field3 = new Field("field3positions", "", positionsType);
    Field field4 = new Field("field4offsets", "", offsetsType);
    Field field5 = new Field("field5payloadsFixed", "", positionsType);
    Field field6 = new Field("field6payloadsVariable", "", positionsType);
    Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
    Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
    doc.add(field1);
    doc.add(field2);
    doc.add(field3);
    doc.add(field4);
    doc.add(field5);
    doc.add(field6);
    doc.add(field7);
    doc.add(field8);
    for (int i = 0; i < MAXDOC; i++) {
      String stringValue =
          Integer.toString(i)
              + " verycommon "
              + English.intToEnglish(i).replace('-', ' ')
              + " "
              + _TestUtil.randomSimpleString(random());
      field1.setStringValue(stringValue);
      field2.setStringValue(stringValue);
      field3.setStringValue(stringValue);
      field4.setStringValue(stringValue);
      field5.setStringValue(stringValue);
      field6.setStringValue(stringValue);
      field7.setStringValue(stringValue);
      field8.setStringValue(stringValue);
      iw.addDocument(doc);
    }
    iw.close();
    verify(dir);
    _TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
    iwc.setOpenMode(OpenMode.APPEND);
    IndexWriter iw2 = new IndexWriter(dir, iwc.clone());
    iw2.forceMerge(1);
    iw2.close();
    verify(dir);
    dir.close();
  }