Esempio n. 1
0
 /**
  * @Title: createIndex @Description: 建立索引
  *
  * @param @param documentList
  * @param @throws IOException
  * @return void
  * @throws
  */
 public static void createIndex(List<Document> documentList, String path) throws IOException {
   // 在当前路径下创建一个叫indexDir的目录
   File file = new File(path);
   String pathAll = file.getParentFile().getParentFile().toString() + "\\index";
   File indexDir = new File(pathAll);
   // 创建索引目录
   Directory directory = FSDirectory.open(indexDir);
   // 创建一个分词器
   Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
   // 创建索引配置器
   IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
   LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
   // 设置segment添加文档(Document)时的合并频率
   // 值较小,建立索引的速度就较慢
   // 值较大,建立索引的速度就较快,>10适合批量建立索引
   mergePolicy.setMergeFactor(50);
   // 设置segment最大合并文档(Document)数
   // 值较小有利于追加索引的速度
   // 值较大,适合批量建立索引和更快的搜索
   mergePolicy.setMaxMergeDocs(5000);
   // 启用复合式索引文件格式,合并多个segment
   mergePolicy.setUseCompoundFile(true);
   indexWriterConfig.setMergePolicy(mergePolicy);
   // 设置索引的打开模式
   indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
   // 创建索引器
   IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
   for (Document document : documentList) {
     // 把文档添加到索引库
     indexWriter.addDocument(document);
   }
   // 提交索引到磁盘上的索引库,关闭索引器
   indexWriter.close();
 }
Esempio n. 2
0
  public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) {
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(150); // faster indexing

    // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32,
    // so yes)
    MergePolicy mp = config.getMergePolicy();
    if (mp instanceof LogMergePolicy) {
      ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing
    }
    return config;
  }
Esempio n. 3
0
  private MergePolicy _getMergePolicy() throws Exception {
    if (PropsValues.LUCENE_MERGE_POLICY.equals(NoMergePolicy.class.getName())) {

      return NoMergePolicy.NO_COMPOUND_FILES;
    }

    ClassLoader classLoader = ClassLoaderUtil.getPortalClassLoader();

    MergePolicy mergePolicy =
        (MergePolicy) InstanceFactory.newInstance(classLoader, PropsValues.LUCENE_MERGE_POLICY);

    if (mergePolicy instanceof LogMergePolicy) {
      LogMergePolicy logMergePolicy = (LogMergePolicy) mergePolicy;

      logMergePolicy.setMergeFactor(PropsValues.LUCENE_MERGE_FACTOR);
    }

    return mergePolicy;
  }
Esempio n. 4
0
  /**
   * Index the fileset.
   *
   * @exception IOException if Lucene I/O exception TODO: refactor!!!!!
   */
  private void indexDocs() throws IOException {
    Date start = new Date();

    boolean create = overwrite;
    // If the index directory doesn't exist,
    // create it and force create mode
    if (indexDir.mkdirs() && !overwrite) {
      create = true;
    }

    FSDirectory dir = FSDirectory.open(indexDir);
    try {
      Searcher searcher = null;
      boolean checkLastModified = false;
      if (!create) {
        try {
          searcher = new IndexSearcher(dir, true);
          checkLastModified = true;
        } catch (IOException ioe) {
          log("IOException: " + ioe.getMessage());
          // Empty - ignore, which indicates to index all
          // documents
        }
      }

      log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);

      IndexWriterConfig conf =
          new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
              .setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND);
      LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
      lmp.setUseCompoundFile(useCompoundIndex);
      lmp.setMergeFactor(mergeFactor);
      IndexWriter writer = new IndexWriter(dir, conf);
      int totalFiles = 0;
      int totalIndexed = 0;
      int totalIgnored = 0;
      try {

        for (int i = 0; i < rcs.size(); i++) {
          ResourceCollection rc = rcs.elementAt(i);
          if (rc.isFilesystemOnly()) {
            Iterator resources = rc.iterator();
            while (resources.hasNext()) {
              Resource r = (Resource) resources.next();
              if (!r.isExists() || !(r instanceof FileResource)) {
                continue;
              }

              totalFiles++;

              File file = ((FileResource) r).getFile();

              if (!file.exists() || !file.canRead()) {
                throw new BuildException(
                    "File \"" + file.getAbsolutePath() + "\" does not exist or is not readable.");
              }

              boolean indexIt = true;

              if (checkLastModified) {
                Term pathTerm = new Term("path", file.getPath());
                TermQuery query = new TermQuery(pathTerm);
                ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs;

                // if document is found, compare the
                // indexed last modified time with the
                // current file
                // - don't index if up to date
                if (hits.length > 0) {
                  Document doc = searcher.doc(hits[0].doc);
                  String indexModified = doc.get("modified").trim();
                  if (indexModified != null) {
                    long lastModified = 0;
                    try {
                      lastModified = DateTools.stringToTime(indexModified);
                    } catch (ParseException e) {
                      // if modified time is not parsable, skip
                    }
                    if (lastModified == file.lastModified()) {
                      // TODO: remove existing document
                      indexIt = false;
                    }
                  }
                }
              }

              if (indexIt) {
                try {
                  log("Indexing " + file.getPath(), Project.MSG_VERBOSE);
                  Document doc = handler.getDocument(file);

                  if (doc == null) {
                    totalIgnored++;
                  } else {
                    // Add the path of the file as a field named "path".  Use a Keyword field, so
                    // that the index stores the path, and so that the path is searchable
                    doc.add(
                        new Field(
                            "path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    // Add the last modified date of the file a field named "modified".  Use a
                    // Keyword field, so that it's searchable, but so that no attempt is made
                    // to tokenize the field into words.
                    doc.add(
                        new Field(
                            "modified",
                            DateTools.timeToString(
                                file.lastModified(), DateTools.Resolution.MILLISECOND),
                            Field.Store.YES,
                            Field.Index.NOT_ANALYZED));

                    writer.addDocument(doc);
                    totalIndexed++;
                  }
                } catch (DocumentHandlerException e) {
                  throw new BuildException(e);
                }
              }
            }
            // for j
          }
          // if (fs != null)
        }
        // for i

        writer.optimize();
      }
      // try
      finally {
        // always make sure everything gets closed,
        // no matter how we exit.
        writer.close();
        if (searcher != null) {
          searcher.close();
        }
      }

      Date end = new Date();

      log(
          totalIndexed
              + " out of "
              + totalFiles
              + " indexed ("
              + totalIgnored
              + " ignored) in "
              + (end.getTime() - start.getTime())
              + " milliseconds");
    } finally {
      dir.close();
    }
  }