Exemplo n.º 1
0
  private void _initIndexWriter() {
    try {
      Analyzer analyzer =
          new LimitTokenCountAnalyzer(
              LuceneHelperUtil.getAnalyzer(), PropsValues.LUCENE_ANALYZER_MAX_TOKENS);

      IndexWriterConfig indexWriterConfig =
          new IndexWriterConfig(LuceneHelperUtil.getVersion(), analyzer);

      indexWriterConfig.setIndexDeletionPolicy(_dumpIndexDeletionPolicy);
      indexWriterConfig.setMergePolicy(_getMergePolicy());
      indexWriterConfig.setMergeScheduler(_getMergeScheduler());
      indexWriterConfig.setRAMBufferSizeMB(PropsValues.LUCENE_BUFFER_SIZE);

      _indexWriter = new IndexWriter(getLuceneDir(), indexWriterConfig);

      if (!IndexReader.indexExists(getLuceneDir())) {

        // Workaround for LUCENE-2386

        if (_log.isDebugEnabled()) {
          _log.debug("Creating missing index");
        }

        _indexWriter.commit();
      }
    } catch (Exception e) {
      _log.error("Initializing Lucene writer failed for " + _companyId, e);
    }
  }
 static IndexWriter createWriter(String filename) throws IOException {
   IndexWriterConfig indexWriterConfig =
       new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
   indexWriterConfig.setRAMBufferSizeMB(100);
   indexWriterConfig.setOpenMode(OpenMode.CREATE);
   return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig);
 }
Exemplo n.º 3
0
 private IndexWriter createWriter(boolean create) throws IOException {
   try {
     final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
     iwc.setCommitOnClose(false); // we by default don't commit on close
     iwc.setOpenMode(
         create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
     iwc.setIndexDeletionPolicy(deletionPolicy);
     // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
     boolean verbose = false;
     try {
       verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
     } catch (Throwable ignore) {
     }
     iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
     iwc.setMergeScheduler(mergeScheduler);
     MergePolicy mergePolicy = config().getMergePolicy();
     // Give us the opportunity to upgrade old segments while performing
     // background merges
     mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
     iwc.setMergePolicy(mergePolicy);
     iwc.setSimilarity(engineConfig.getSimilarity());
     iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac());
     iwc.setCodec(engineConfig.getCodec());
     iwc.setUseCompoundFile(
         true); // always use compound on flush - reduces # of file-handles on refresh
     return new IndexWriter(store.directory(), iwc);
   } catch (LockObtainFailedException ex) {
     logger.warn("could not lock IndexWriter", ex);
     throw ex;
   }
 }
Exemplo n.º 4
0
  protected void indexList(List<AgeObject> aol, boolean append) {
    try {
      if (searcher != null) {
        searcher.getIndexReader().close();
        searcher.close();
      }

      IndexWriterConfig idxCfg = new IndexWriterConfig(Version.LUCENE_36, analyzer);

      idxCfg.setRAMBufferSizeMB(50);
      idxCfg.setOpenMode(append ? OpenMode.APPEND : OpenMode.CREATE);

      IndexWriter iWriter = new IndexWriter(index, idxCfg);

      for (Document d : new DocCollection(aol, extractors)) iWriter.addDocument(d);

      iWriter.close();

      searcher = new IndexSearcher(IndexReader.open(index));
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
Exemplo n.º 5
0
 private IndexWriter getIndexWriter(File file) throws IOException {
   FSDirectory dir = FSDirectory.open(file);
   IndexWriterConfig config =
       new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40));
   config.setOpenMode(OpenMode.CREATE_OR_APPEND);
   config.setRAMBufferSizeMB(RamBufferSizeMB);
   return new IndexWriter(dir, config);
 }
  /**
   * Builds a new {@code RowDirectory} using the specified directory path and analyzer.
   *
   * @param keyspace The keyspace name.
   * @param table The table name.
   * @param name The index name.
   * @param path The path of the directory in where the Lucene files will be stored.
   * @param ramBufferMB The index writer buffer size in MB.
   * @param maxMergeMB NRTCachingDirectory max merge size in MB.
   * @param maxCachedMB NRTCachingDirectory max cached MB.
   * @param analyzer The default {@link Analyzer}.
   * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until
   *     this time.
   * @param refreshCallback A runnable to be run on index refresh.
   * @throws IOException If Lucene throws IO errors.
   */
  public LuceneIndex(
      String keyspace,
      String table,
      String name,
      Path path,
      Integer ramBufferMB,
      Integer maxMergeMB,
      Integer maxCachedMB,
      Analyzer analyzer,
      Double refreshSeconds,
      Runnable refreshCallback)
      throws IOException {
    this.path = path;
    this.refreshCallback = refreshCallback;
    this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name);

    // Open or create directory
    FSDirectory fsDirectory = FSDirectory.open(path);
    directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

    // Setup index writer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setRAMBufferSizeMB(ramBufferMB);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    config.setUseCompoundFile(true);
    config.setMergePolicy(new TieredMergePolicy());
    indexWriter = new IndexWriter(directory, config);

    // Setup NRT search
    SearcherFactory searcherFactory =
        new SearcherFactory() {
          public IndexSearcher newSearcher(IndexReader reader) throws IOException {
            LuceneIndex.this.refreshCallBack();
            IndexSearcher searcher = new IndexSearcher(reader);
            searcher.setSimilarity(new NoIDFSimilarity());
            return searcher;
          }
        };
    TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
    searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
    searcherReopener =
        new ControlledRealTimeReopenThread<>(
            trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds);
    searcherReopener.start(); // Start the refresher thread

    // Register JMX MBean
    try {
      objectName =
          new ObjectName(
              String.format(
                  "com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s",
                  keyspace, table, name));
      ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName);
    } catch (MBeanException | OperationsException e) {
      Log.error(e, "Error while registering MBean");
    }
  }
Exemplo n.º 7
0
 public CreateIndex(String pPath) throws IOException {
   analyzer = new StandardAnalyzer();
   iwc = new IndexWriterConfig(analyzer);
   iwc.setOpenMode(OpenMode.CREATE);
   iwc.setRAMBufferSizeMB(ConfigConstant.INDEX_BUFFER_SIZE_IN_MB);
   // iwc.setCommitOnClose(true);
   writer = new IndexWriter(FSDirectory.open(Paths.get(pPath)), iwc);
   LOCAL_CACHE = new HashMap<>();
 }
Exemplo n.º 8
0
  private static IndexWriter create_index_writer(
      String indexPath, IndexWriterConfig.OpenMode openMode) throws IOException {
    Directory dir = FSDirectory.open(new File(indexPath));
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    iwc.setOpenMode(openMode);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    return writer;
    // indexDocs(writer, docDir);
  }
Exemplo n.º 9
0
  public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) {
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(150); // faster indexing

    // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32,
    // so yes)
    MergePolicy mp = config.getMergePolicy();
    if (mp instanceof LogMergePolicy) {
      ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing
    }
    return config;
  }
  public NTriplesFileLuceneSyntacticIndexCreator(
      InputStream nTriplesStream, String indexPath, String searchField) throws IOException {
    // setup the index
    Directory directory = FSDirectory.open(new File(indexPath));

    // setup the index analyzer
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    indexWriterConfig.setRAMBufferSizeMB(1024.0);
    indexWriterConfig.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, indexWriterConfig);

    System.out.println("Creating index ...");

    // setup the index fields, here two fields, for URI and text
    FieldType stringType = new FieldType(StringField.TYPE_STORED);
    stringType.setStoreTermVectors(false);
    FieldType textType = new FieldType(TextField.TYPE_STORED);
    textType.setStoreTermVectors(false);

    Set<Document> documents = new HashSet<Document>();

    Iterator<Triple> iterator =
        RiotReader.createIteratorTriples(nTriplesStream, Lang.NTRIPLES, null);

    Triple triple;
    String text;
    String uri;
    Document doc;
    int i = 0;
    while (iterator.hasNext()) {
      triple = iterator.next();

      uri = triple.getSubject().getURI();
      text = triple.getObject().getLiteralLexicalForm();

      doc = new Document();
      doc.add(new Field("uri", uri, stringType));
      doc.add(new Field(searchField, text, textType));

      writer.addDocument(doc);
      if (i++ % 10000 == 0) {
        //				writer.commit();
        System.out.println(i);
      }
    }

    writer.commit();
    writer.close();
  }
Exemplo n.º 11
0
 public void applySetting(IndexWriterConfig writerConfig, int value) {
   writerConfig.setRAMBufferSizeMB(value);
 }
Exemplo n.º 12
0
  /**
   * Update the content of this index database
   *
   * @throws IOException if an error occurs
   * @throws HistoryException if an error occurs when accessing the history
   */
  public void update() throws IOException, HistoryException {
    synchronized (lock) {
      if (running) {
        throw new IOException("Indexer already running!");
      }
      running = true;
      interrupted = false;
    }

    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
      ctags = new Ctags();
      ctags.setBinary(ctgs);
    }
    if (ctags == null) {
      LOGGER.severe("Unable to run ctags! searching definitions will not work!");
    }

    if (ctags != null) {
      String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
      if (filename != null) {
        ctags.setCTagsExtraOptionsFile(filename);
      }
    }

    try {
      Analyzer analyzer = AnalyzerGuru.getAnalyzer();
      IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
      iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
      writer = new IndexWriter(indexDirectory, iwc);
      writer.commit(); // to make sure index exists on the disk

      if (directories.isEmpty()) {
        if (project == null) {
          directories.add("");
        } else {
          directories.add(project.getPath());
        }
      }

      for (String dir : directories) {
        File sourceRoot;
        if ("".equals(dir)) {
          sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
        } else {
          sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
        }

        HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);

        String startuid = Util.path2uid(dir, "");
        IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
        Terms terms = null;
        int numDocs = reader.numDocs();
        if (numDocs > 0) {
          Fields uFields = MultiFields.getFields(reader); // reader.getTermVectors(0);
          terms = uFields.terms(QueryBuilder.U);
        }

        try {
          if (numDocs > 0) {
            uidIter = terms.iterator();
            TermsEnum.SeekStatus stat =
                uidIter.seekCeil(new BytesRef(startuid)); // init uid
            if (stat == TermsEnum.SeekStatus.END) {
              uidIter = null;
              LOGGER.log(
                  Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
            }
          }
          // The code below traverses the tree to get total count.
          int file_cnt = 0;
          if (RuntimeEnvironment.getInstance().isPrintProgress()) {
            LOGGER.log(Level.INFO, "Counting files in {0} ...", dir);
            file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
            LOGGER.log(
                Level.INFO, "Need to process: {0} files for {1}", new Object[] {file_cnt, dir});
          }

          indexDown(sourceRoot, dir, false, 0, file_cnt);

          while (uidIter != null
              && uidIter.term() != null
              && uidIter.term().utf8ToString().startsWith(startuid)) {

            removeFile();
            BytesRef next = uidIter.next();
            if (next == null) {
              uidIter = null;
            }
          }
        } finally {
          reader.close();
        }
      }
    } finally {
      if (writer != null) {
        try {
          writer.prepareCommit();
          writer.commit();
          writer.close();
        } catch (IOException e) {
          LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
        }
      }

      if (ctags != null) {
        try {
          ctags.close();
        } catch (IOException e) {
          LOGGER.log(Level.WARNING, "An error occured while closing ctags process", e);
        }
      }

      synchronized (lock) {
        running = false;
      }
    }

    if (!isInterrupted() && isDirty()) {
      if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
        optimize();
      }
      RuntimeEnvironment env = RuntimeEnvironment.getInstance();
      File timestamp = new File(env.getDataRootFile(), "timestamp");
      String purpose = "used for timestamping the index database.";
      if (timestamp.exists()) {
        if (!timestamp.setLastModified(System.currentTimeMillis())) {
          LOGGER.log(
              Level.WARNING,
              "Failed to set last modified time on ''{0}'', {1}",
              new Object[] {timestamp.getAbsolutePath(), purpose});
        }
      } else {
        if (!timestamp.createNewFile()) {
          LOGGER.log(
              Level.WARNING,
              "Failed to create file ''{0}'', {1}",
              new Object[] {timestamp.getAbsolutePath(), purpose});
        }
      }
    }
  }
Exemplo n.º 13
0
  public static void main(String[] args) {
    String usage =
        "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
      if ("-index".equals(args[i])) {
        indexPath = args[i + 1];
        i++;
      } else if ("-docs".equals(args[i])) {
        docsPath = args[i + 1];
        i++;
      } else if ("-update".equals(args[i])) {
        create = false;
      }
    }

    if (docsPath == null) {
      System.err.println("Usage: " + usage);
      System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println(
          "Document directory '"
              + docDir.getAbsolutePath()
              + "' does not exist or is not readable, please check the path");
      System.exit(1);
    }

    Date start = new Date();
    try {
      System.out.println("Indexing to directory '" + indexPath + "'...");

      Directory dir = FSDirectory.open(new File(indexPath));
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_41, analyzer);

      if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(OpenMode.CREATE);
      } else {
        // Add new documents to an existing index:
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      }

      // Optional: for better indexing performance, if you
      // are indexing many documents, increase the RAM
      // buffer.  But if you do this, increase the max heap
      // size to the JVM (eg add -Xmx512m or -Xmx1g):
      //
      iwc.setRAMBufferSizeMB(512.0);

      IndexWriter writer = new IndexWriter(dir, iwc);
      indexDocs(writer, docDir);

      // NOTE: if you want to maximize search performance,
      // you can optionally call forceMerge here.  This can be
      // a terribly costly operation, so generally it's only
      // worth it when your index is relatively static (ie
      // you're done adding documents to it):
      //
      // writer.forceMerge(1);

      writer.close();
      Date end = new Date();
      IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
      int num = reader.numDocs();
      System.out.println(end.getTime() - start.getTime() + " total milliseconds");
      System.out.println("Contains " + num + " Documents");

    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
  }