static IndexWriter createWriter(String filename) throws IOException {
   IndexWriterConfig indexWriterConfig =
       new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
   indexWriterConfig.setRAMBufferSizeMB(100);
   indexWriterConfig.setOpenMode(OpenMode.CREATE);
   return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig);
 }
  @BeforeClass
  public static void beforeClassCountingFacetsAggregatorTest() throws Exception {
    indexDir = newDirectory();
    taxoDir = newDirectory();

    // create an index which has:
    // 1. Segment with no categories, but matching results
    // 2. Segment w/ categories, but no results
    // 3. Segment w/ categories and results
    // 4. Segment w/ categories, but only some results

    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMergePolicy(
        NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments
    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    allExpectedCounts = newCounts();
    termExpectedCounts = newCounts();

    // segment w/ no categories
    indexDocsNoFacets(indexWriter);

    // segment w/ categories, no content
    indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);

    // segment w/ categories and content
    indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);

    // segment w/ categories and some content
    indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);

    IOUtils.close(indexWriter, taxoWriter);
  }
 /*
  * Test per field codec support - adding fields with random codecs
  */
 @Test
 public void testStressPerFieldCodec() throws IOException {
   Directory dir = newDirectory(random());
   final int docsPerRound = 97;
   int numRounds = atLeast(1);
   for (int i = 0; i < numRounds; i++) {
     int num = TestUtil.nextInt(random(), 30, 60);
     IndexWriterConfig config =
         newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     config.setOpenMode(OpenMode.CREATE_OR_APPEND);
     IndexWriter writer = newWriter(dir, config);
     for (int j = 0; j < docsPerRound; j++) {
       final Document doc = new Document();
       for (int k = 0; k < num; k++) {
         FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
         customType.setTokenized(random().nextBoolean());
         customType.setOmitNorms(random().nextBoolean());
         Field field =
             newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType);
         doc.add(field);
       }
       writer.addDocument(doc);
     }
     if (random().nextBoolean()) {
       writer.forceMerge(1);
     }
     writer.commit();
     assertEquals((i + 1) * docsPerRound, writer.maxDoc());
     writer.close();
   }
   dir.close();
 }
 /**
  * @Title: createIndex @Description: 建立索引
  *
  * @param @param documentList
  * @param @throws IOException
  * @return void
  * @throws
  */
 public static void createIndex(List<Document> documentList, String path) throws IOException {
   // 在当前路径下创建一个叫indexDir的目录
   File file = new File(path);
   String pathAll = file.getParentFile().getParentFile().toString() + "\\index";
   File indexDir = new File(pathAll);
   // 创建索引目录
   Directory directory = FSDirectory.open(indexDir);
   // 创建一个分词器
   Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
   // 创建索引配置器
   IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
   LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
   // 设置segment添加文档(Document)时的合并频率
   // 值较小,建立索引的速度就较慢
   // 值较大,建立索引的速度就较快,>10适合批量建立索引
   mergePolicy.setMergeFactor(50);
   // 设置segment最大合并文档(Document)数
   // 值较小有利于追加索引的速度
   // 值较大,适合批量建立索引和更快的搜索
   mergePolicy.setMaxMergeDocs(5000);
   // 启用复合式索引文件格式,合并多个segment
   mergePolicy.setUseCompoundFile(true);
   indexWriterConfig.setMergePolicy(mergePolicy);
   // 设置索引的打开模式
   indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
   // 创建索引器
   IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
   for (Document document : documentList) {
     // 把文档添加到索引库
     indexWriter.addDocument(document);
   }
   // 提交索引到磁盘上的索引库,关闭索引器
   indexWriter.close();
 }
Beispiel #5
0
  /** Generate a spelling suggestion for the definitions stored in defs */
  public void createSpellingSuggestions() {
    IndexReader indexReader = null;
    SpellChecker checker;

    try {
      log.info("Generating spelling suggestion index ... ");
      indexReader = DirectoryReader.open(indexDirectory);
      checker = new SpellChecker(spellDirectory);
      // TODO below seems only to index "defs" , possible bug ?
      Analyzer analyzer = AnalyzerGuru.getAnalyzer();
      IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
      iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false);
      log.info("done");
    } catch (IOException e) {
      log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
    } finally {
      if (indexReader != null) {
        try {
          indexReader.close();
        } catch (IOException e) {
          log.log(Level.WARNING, "An error occured while closing reader", e);
        }
      }
      if (spellDirectory != null) {
        spellDirectory.close();
      }
    }
  }
  protected void indexList(List<AgeObject> aol, boolean append) {
    try {
      if (searcher != null) {
        searcher.getIndexReader().close();
        searcher.close();
      }

      IndexWriterConfig idxCfg = new IndexWriterConfig(Version.LUCENE_36, analyzer);

      idxCfg.setRAMBufferSizeMB(50);
      idxCfg.setOpenMode(append ? OpenMode.APPEND : OpenMode.CREATE);

      IndexWriter iWriter = new IndexWriter(index, idxCfg);

      for (Document d : new DocCollection(aol, extractors)) iWriter.addDocument(d);

      iWriter.close();

      searcher = new IndexSearcher(IndexReader.open(index));
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
Beispiel #7
0
 @Override
 public void deleteOnCreator(Integer creator) throws Exception {
   boolean create = true;
   File indexDir = new File(getIndexPath());
   if (!indexDir.exists()) {
     indexDir.mkdirs();
   } else {
     if (indexDir.list().length > 0) {
       create = false;
     }
   }
   Directory dir = FSDirectory.open(indexDir);
   IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
   if (create) {
     iwc.setOpenMode(OpenMode.CREATE);
   } else {
     iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   }
   IndexWriter writer = null;
   try {
     writer = new IndexWriter(dir, iwc);
     writer.deleteDocuments(
         new Term(
             FIELD_LABEL_CREATE_USER,
             StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT)));
     writer.commit();
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
Beispiel #8
0
 public void writeIndex(IndexingValue indexingValue) throws Exception {
   boolean create = true;
   File indexDir = new File(getIndexPath());
   if (!indexDir.exists()) {
     indexDir.mkdirs();
   } else {
     if (indexDir.list().length > 0) {
       create = false;
     }
   }
   Directory dir = FSDirectory.open(indexDir);
   IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
   if (create) {
     iwc.setOpenMode(OpenMode.CREATE);
   } else {
     iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   }
   IndexWriter writer = null;
   try {
     writer = new IndexWriter(dir, iwc);
     addDoc(writer, indexingValue);
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
 public void index() throws IOException {
   final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(this.analyzer);
   indexWriterConfig.setOpenMode(OpenMode.CREATE);
   final IndexWriter indexWriter = new IndexWriter(this.dir, indexWriterConfig);
   indexDocs(indexWriter);
   indexWriter.close();
 }
Beispiel #10
0
  @Override
  public void deleteItem(String id) throws Exception {
    boolean create = true;
    File indexDir = new File(getIndexPath());
    if (!indexDir.exists()) {
      indexDir.mkdirs();
    } else {
      if (indexDir.list().length > 0) {
        create = false;
      }
    }
    Directory dir = FSDirectory.open(indexDir);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
    if (create) {
      iwc.setOpenMode(OpenMode.CREATE);
    } else {
      iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    IndexWriter writer = null;
    try {
      writer = new IndexWriter(dir, iwc);
      writer.deleteDocuments(new Term(FIELD_LABEL_ID, id));
      writer.commit();

    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
  /** Test that core cache key (needed for NRT) is working */
  public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    ShardId shardId = new ShardId("fake", "_na_", 1);
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId);
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(
        ir.leaves().get(0).reader().getCoreCacheKey(),
        ir2.leaves().get(0).reader().getCoreCacheKey());
    IOUtils.close(ir, ir2, iw, dir);
  }
Beispiel #12
0
  private void _initIndexWriter() {
    try {
      Analyzer analyzer =
          new LimitTokenCountAnalyzer(
              LuceneHelperUtil.getAnalyzer(), PropsValues.LUCENE_ANALYZER_MAX_TOKENS);

      IndexWriterConfig indexWriterConfig =
          new IndexWriterConfig(LuceneHelperUtil.getVersion(), analyzer);

      indexWriterConfig.setIndexDeletionPolicy(_dumpIndexDeletionPolicy);
      indexWriterConfig.setMergePolicy(_getMergePolicy());
      indexWriterConfig.setMergeScheduler(_getMergeScheduler());
      indexWriterConfig.setRAMBufferSizeMB(PropsValues.LUCENE_BUFFER_SIZE);

      _indexWriter = new IndexWriter(getLuceneDir(), indexWriterConfig);

      if (!IndexReader.indexExists(getLuceneDir())) {

        // Workaround for LUCENE-2386

        if (_log.isDebugEnabled()) {
          _log.debug("Creating missing index");
        }

        _indexWriter.commit();
      }
    } catch (Exception e) {
      _log.error("Initializing Lucene writer failed for " + _companyId, e);
    }
  }
 IndexWriter createIndexWriter(final boolean create, final Directory directory)
     throws IOException {
   final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(ANALYZER);
   indexWriterConfig.setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND);
   indexWriterConfig.setSimilarity(SIMILARITY);
   return new IndexWriter(directory, indexWriterConfig);
 }
 private IndexWriter getIndexWriter(File file) throws IOException {
   FSDirectory dir = FSDirectory.open(file);
   IndexWriterConfig config =
       new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40));
   config.setOpenMode(OpenMode.CREATE_OR_APPEND);
   config.setRAMBufferSizeMB(RamBufferSizeMB);
   return new IndexWriter(dir, config);
 }
  /**
   * Builds a new {@code RowDirectory} using the specified directory path and analyzer.
   *
   * @param keyspace The keyspace name.
   * @param table The table name.
   * @param name The index name.
   * @param path The path of the directory in where the Lucene files will be stored.
   * @param ramBufferMB The index writer buffer size in MB.
   * @param maxMergeMB NRTCachingDirectory max merge size in MB.
   * @param maxCachedMB NRTCachingDirectory max cached MB.
   * @param analyzer The default {@link Analyzer}.
   * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until
   *     this time.
   * @param refreshCallback A runnable to be run on index refresh.
   * @throws IOException If Lucene throws IO errors.
   */
  public LuceneIndex(
      String keyspace,
      String table,
      String name,
      Path path,
      Integer ramBufferMB,
      Integer maxMergeMB,
      Integer maxCachedMB,
      Analyzer analyzer,
      Double refreshSeconds,
      Runnable refreshCallback)
      throws IOException {
    this.path = path;
    this.refreshCallback = refreshCallback;
    this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name);

    // Open or create directory
    FSDirectory fsDirectory = FSDirectory.open(path);
    directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

    // Setup index writer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setRAMBufferSizeMB(ramBufferMB);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    config.setUseCompoundFile(true);
    config.setMergePolicy(new TieredMergePolicy());
    indexWriter = new IndexWriter(directory, config);

    // Setup NRT search
    SearcherFactory searcherFactory =
        new SearcherFactory() {
          public IndexSearcher newSearcher(IndexReader reader) throws IOException {
            LuceneIndex.this.refreshCallBack();
            IndexSearcher searcher = new IndexSearcher(reader);
            searcher.setSimilarity(new NoIDFSimilarity());
            return searcher;
          }
        };
    TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
    searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
    searcherReopener =
        new ControlledRealTimeReopenThread<>(
            trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds);
    searcherReopener.start(); // Start the refresher thread

    // Register JMX MBean
    try {
      objectName =
          new ObjectName(
              String.format(
                  "com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s",
                  keyspace, table, name));
      ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName);
    } catch (MBeanException | OperationsException e) {
      Log.error(e, "Error while registering MBean");
    }
  }
Beispiel #16
0
 /** Retrieve an IndexWriter configuration object. */
 private IndexWriterConfig retrieveIndexWriterConfig(boolean create) {
   IndexWriterConfig indexWriterConfig =
       new IndexWriterConfig(USE_LUCENE_VERSION, new StandardAnalyzer(USE_LUCENE_VERSION));
   indexWriterConfig.setOpenMode(
       ((create)
           ? IndexWriterConfig.OpenMode.CREATE
           : IndexWriterConfig.OpenMode.CREATE_OR_APPEND));
   return indexWriterConfig;
 }
 public CreateIndex(String pPath) throws IOException {
   analyzer = new StandardAnalyzer();
   iwc = new IndexWriterConfig(analyzer);
   iwc.setOpenMode(OpenMode.CREATE);
   iwc.setRAMBufferSizeMB(ConfigConstant.INDEX_BUFFER_SIZE_IN_MB);
   // iwc.setCommitOnClose(true);
   writer = new IndexWriter(FSDirectory.open(Paths.get(pPath)), iwc);
   LOCAL_CACHE = new HashMap<>();
 }
Beispiel #18
0
  /**
   * index the given catalog (a domain of the corpus: newspaper, literature, stc)
   *
   * @param catalogName name of the catalog (domain)
   * @param create create index (removing any previous index) or just update
   */
  public boolean index(String catalogName, boolean create) {

    try {

      catalog c = catalogs.get(catalogName);
      if (c == null) {
        log("unknown catalog: " + catalogName);
        return false;
      }
      String docsPath = c.docPath;
      String indexPath = c.indexPath;

      final Path docDir = Paths.get(docsPath);
      if (!Files.isReadable(docDir)) {
        log(
            "Document directory '"
                + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        return false;
      }
      Date start = new Date();

      log("Indexing to directory '" + indexPath + "'...");

      Directory dir = FSDirectory.open(Paths.get(indexPath));
      IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

      if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(OpenMode.CREATE);
      } else {
        // Add new documents to an existing index:
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      }

      // Optional: for better indexing performance, if you
      // are indexing many documents, increase the RAM
      // buffer.  But if you do this, increase the max heap
      // size to the JVM (eg add -Xmx512m or -Xmx1g):
      //
      // iwc.setRAMBufferSizeMB(256.0);

      IndexWriter writer = new IndexWriter(dir, iwc);

      indexDocs(writer, docDir);

      writer.close();
      Date end = new Date();
      log(new String(" " + (end.getTime() - start.getTime()) + " total milliseconds"));

    } catch (IOException e) {
      log(e.getMessage());
      return false;
    }
    return true;
  }
Beispiel #19
0
 public void mergeIndex() throws IOException {
   File indexDir = new File(FILE_INDEX);
   FSDirectory fsdir = FSDirectory.open(indexDir);
   Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_47);
   IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, luceneAnalyzer);
   config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
   IndexWriter fswriter = new IndexWriter(fsdir, config);
   fswriter.addIndexes(new Directory[] {ramdir}); // 合并数据
   fswriter.close();
 }
  IndexWriter getIndexWriter(Directory luceneDir)
      throws CorruptIndexException, LockObtainFailedException, IOException, ProviderException {
    IndexWriter writer = null;
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, getLuceneAnalyzer());
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    writer = new IndexWriter(luceneDir, writerConfig);

    // writer.setInfoStream( System.out );
    return writer;
  }
  private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS)
      throws Exception {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random);
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    final MergePolicy mp = conf.getMergePolicy();
    mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
    IndexWriter writer = new IndexWriter(directory, conf);
    if (VERBOSE) {
      System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
    }

    for (int j = 0; j < MAX_DOCS; j++) {
      Document d = new Document();
      d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
      d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES));
      writer.addDocument(d);
    }
    writer.close();

    // try a search without OR
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(reader);

    Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
    out.println("Query: " + query.toString(PRIORITY_FIELD));
    if (VERBOSE) {
      System.out.println("TEST: search query=" + query);
    }

    final Sort sort = new Sort(SortField.FIELD_SCORE, new SortField(ID_FIELD, SortField.Type.INT));

    ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    // try a new search with OR
    searcher = newSearcher(reader);
    hits = null;

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(
        new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
    booleanQuery.add(
        new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
    out.println("Query: " + booleanQuery.toString(PRIORITY_FIELD));

    hits = searcher.search(booleanQuery, null, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    reader.close();
    directory.close();
  }
Beispiel #22
0
 public void open() throws IOException {
   if (_closed) {
     IndexWriterConfig idxWriterConfig =
         new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34));
     idxWriterConfig.setMergePolicy(new ZoieMergePolicy());
     idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
     _idxWriter = new IndexWriter(_dir, idxWriterConfig);
     updateReader();
     _closed = false;
   }
 }
Beispiel #23
0
 public static IndexWriter openWriter(
     Directory directory, int maxMergeDocs, boolean useSerialMerger)
     throws CorruptIndexException, LockObtainFailedException, IOException {
   IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
   if (useSerialMerger) {
     indexWriterConfig.setMergeScheduler(mergeScheduler);
   }
   LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
   mergePolicy.setMaxMergeDocs(maxMergeDocs);
   indexWriterConfig.setMergePolicy(mergePolicy);
   return new IndexWriter(directory, indexWriterConfig);
 }
Beispiel #24
0
  private static IndexWriter create_index_writer(
      String indexPath, IndexWriterConfig.OpenMode openMode) throws IOException {
    Directory dir = FSDirectory.open(new File(indexPath));
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    iwc.setOpenMode(openMode);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    return writer;
    // indexDocs(writer, docDir);
  }
  protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
      if (field.storedPayloads) {
        mapping.put(
            field.name,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                filter = new TypeAsPayloadTokenFilter(filter);
                return new TokenStreamComponents(tokenizer, filter);
              }
            });
      }
    }
    PerFieldAnalyzerWrapper wrapper =
        new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
      Document d = new Document();
      d.add(new Field("id", doc.id, StringField.TYPE_STORED));
      for (int i = 0; i < doc.fieldContent.length; i++) {
        FieldType type = new FieldType(TextField.TYPE_STORED);
        TestFieldSetting fieldSetting = doc.fieldSettings[i];

        type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
        type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
        type.setStoreTermVectorPositions(
            fieldSetting.storedPositions
                || fieldSetting.storedPayloads
                || fieldSetting.storedOffset);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
      }
      writer.updateDocument(new Term("id", doc.id), d);
      writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
  }
Beispiel #26
0
  public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) {
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(150); // faster indexing

    // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32,
    // so yes)
    MergePolicy mp = config.getMergePolicy();
    if (mp instanceof LogMergePolicy) {
      ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing
    }
    return config;
  }
  /** Override this to customize index settings, e.g. which codec to use. */
  protected IndexWriterConfig getIndexWriterConfig(
      Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
    IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
    iwc.setCodec(new Lucene46Codec());
    iwc.setOpenMode(openMode);

    // This way all merged segments will be sorted at
    // merge time, allow for per-segment early termination
    // when those segments are searched:
    iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));

    return iwc;
  }
 private DefaultIndexEngine getRamDirectory() throws IOException {
   final RAMDirectory directory = new RAMDirectory();
   IndexWriterConfig conf =
       new IndexWriterConfig(
           DefaultIndexManager.LUCENE_VERSION,
           new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION));
   conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
   new IndexWriter(directory, conf).close();
   final DefaultConfiguration configuration =
       new DefaultConfiguration(
           directory, new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION));
   return new DefaultIndexEngine(configuration, FlushPolicy.FLUSH);
 }
  private static void adicionaMensagemJogadorAppMIndice(
      String id,
      String hashtag_jogador,
      String utilizador,
      String imagem,
      String mensagem,
      String data_ano_mes_dia,
      String data_hora_minuto,
      String data_amd_formatada,
      String data_hm_formatada,
      String lingua,
      String tipo_mensagem,
      String fonte)
      throws IOException {
    // TODO Auto-generated method stub
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
    File file = new File("C:\\Workspace\\SocialSports\\Indices_LuceneAppM\\" + hashtag_jogador);
    Directory index = new SimpleFSDirectory(file);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);
    if (!file.exists()) {
      iwc.setOpenMode(OpenMode.CREATE);
    } else {
      iwc.setOpenMode(OpenMode.APPEND);
    }
    IndexWriter index_writer = new IndexWriter(index, iwc);

    //		Um campo indexado "INDEX.ANALYZED" significa que é pesquisável ou
    //		que podem ser feitas pesquisas com base nesse campo.

    //		Um campo guardado "STORE.YES" (no índice) significa que o seu conteúdo pode ser visto
    //		como resultado de pesquisas

    Document doc = new Document();
    doc.add(new Field("id", id, Field.Store.YES, Index.ANALYZED));
    doc.add(new Field("hashtag", hashtag_jogador, Field.Store.YES, Index.ANALYZED));
    doc.add(new Field("utilizador", utilizador, Field.Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("imagem", imagem, Field.Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("mensagem", mensagem, Field.Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("data_ano_mes_dia", data_ano_mes_dia, Field.Store.YES, Index.ANALYZED));
    doc.add(new Field("data_hora_minuto", data_hora_minuto, Field.Store.YES, Index.ANALYZED));
    doc.add(
        new Field("data_amd_formatada", data_amd_formatada, Field.Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("data_hm_formatada", data_hm_formatada, Field.Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("lingua", lingua, Field.Store.YES, Index.ANALYZED));
    doc.add(new Field("tipo_mensagem", tipo_mensagem, Field.Store.YES, Index.ANALYZED));
    doc.add(new Field("fonte", fonte, Field.Store.YES, Index.ANALYZED));

    index_writer.addDocument(doc);
    index_writer.optimize();
    index_writer.close();
  }
  public NTriplesFileLuceneSyntacticIndexCreator(
      InputStream nTriplesStream, String indexPath, String searchField) throws IOException {
    // setup the index
    Directory directory = FSDirectory.open(new File(indexPath));

    // setup the index analyzer
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    indexWriterConfig.setRAMBufferSizeMB(1024.0);
    indexWriterConfig.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, indexWriterConfig);

    System.out.println("Creating index ...");

    // setup the index fields, here two fields, for URI and text
    FieldType stringType = new FieldType(StringField.TYPE_STORED);
    stringType.setStoreTermVectors(false);
    FieldType textType = new FieldType(TextField.TYPE_STORED);
    textType.setStoreTermVectors(false);

    Set<Document> documents = new HashSet<Document>();

    Iterator<Triple> iterator =
        RiotReader.createIteratorTriples(nTriplesStream, Lang.NTRIPLES, null);

    Triple triple;
    String text;
    String uri;
    Document doc;
    int i = 0;
    while (iterator.hasNext()) {
      triple = iterator.next();

      uri = triple.getSubject().getURI();
      text = triple.getObject().getLiteralLexicalForm();

      doc = new Document();
      doc.add(new Field("uri", uri, stringType));
      doc.add(new Field(searchField, text, textType));

      writer.addDocument(doc);
      if (i++ % 10000 == 0) {
        //				writer.commit();
        System.out.println(i);
      }
    }

    writer.commit();
    writer.close();
  }