Пример #1
0
 @PostConstruct
 public void createOrVerifyIndex() throws Exception {
   LOGGER.info("Initializing Index..........................please Wait..0%");
   index = new File(appproperties.getLuceneIndexPath());
   suggest = new File(appproperties.getLiceneSuggestIndexPath());
   directory = FSDirectory.open(index, NoLockFactory.getNoLockFactory());
   suggestDirectory = FSDirectory.open(suggest, NoLockFactory.getNoLockFactory());
   iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   writer = new IndexWriter(directory, iwc);
   writer.commit();
   indexReader = DirectoryReader.open(directory);
   indexSearcher = new IndexSearcher(indexReader, executorService);
   parser = new MultiFieldQueryParser(new String[] {TITLE_FIELD, CONTENTS_FIELD}, analyzer);
   suggester =
       new AnalyzingInfixSuggester(
           Version.LATEST,
           suggestDirectory,
           analyzer,
           analyzer,
           AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS);
   if (!suggest.exists() && !suggest.isDirectory()) {
     LOGGER.info(
         "Lucene Suggest did not exist.........................................Building Please wait.........0%");
     suggester.build(new IndexFileIterator(new ArrayList<IndexFile>().iterator()));
     suggester.refresh();
     LOGGER.info(
         "Lucene Suggest Build Complete...................................................................100%");
   }
   LOGGER.info("Lucene Ready............................................100%");
 }
Пример #2
0
  public void useFSDirectory(final String indexDirectory) throws Exception {
    if (!IndexReader.indexExists(FSDirectory.open(new File(indexDirectory)))) {
      this.createIndexDirectory(indexDirectory);
    }

    this.directory = FSDirectory.open(new File(indexDirectory));
  }
Пример #3
0
  /**
   * Compares indexes at different location based on the key field which should contain the unique
   * value.
   *
   * @param indexDir1 indexDir1
   * @param indexDir2 indexDir2
   * @param keyFieldName keyFieldName
   * @return result of the compare
   * @throws IOException problems accessing indexes
   * @throws ParseException problems parsing query
   */
  public Diff<Document, Diff<Fieldable, DocumentDiff>> compare(
      String indexDir1, String indexDir2, String keyFieldName) throws IOException, ParseException {
    FSDirectory dir1 = FSDirectory.open(new File(indexDir1));
    IndexReader reader1 = IndexReader.open(dir1);

    FSDirectory dir2 = FSDirectory.open(new File(indexDir2));
    IndexReader reader2 = IndexReader.open(dir2);

    return compare(reader1, reader2, keyFieldName);
  }
 /**
  * Provides basic search functions ...
  *
  * @param img
  * @param indexPath
  * @return
  * @throws IOException
  */
 public TopDocs search(BufferedImage img, String indexPath) throws IOException {
   ImageSearcher searcher =
       new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
   ImageSearchHits hits =
       searcher.search(img, DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))));
   StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4);
   for (int j = 0; j < numReferenceObjectsUsed; j++) {
     sb.append(hits.doc(j).getValues("ro-id")[0]);
     sb.append(' ');
   }
   return scoreDocs(sb.toString(), DirectoryReader.open(FSDirectory.open(new File(indexPath))));
 }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
  private void mergeIndexByPart(
      Path minorPath,
      Path inputPath,
      Path outputPath,
      int partId,
      int localIndexVer,
      int maxVersion,
      int partNo)
      throws CorruptIndexException, IOException {
    List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>();
    if (minorPath != null && PathUtil.exists(minorPath)) {
      if (PathUtil.exists(minorPath.cat(partId + ""))) {
        mergeIndexArray.add(
            IndexReader.open(
                FSDirectory.open(
                    minorPath
                        .cat(MailConstants.PART_PRE + partId)
                        .cat(IndexBuilder.LUCENE_INDEX_DIR)
                        .asFile())));
      }
    }

    for (int i = localIndexVer + 1; i <= maxVersion; i++) {
      Path segPath = inputPath.cat(i + "");
      Path[] userPathes = segPath.listPathes();
      for (Path userPath : userPathes) {
        if (!userPath.getName().equals("built")) {
          int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo);
          if (PathUtil.exists(segPath) && shouldInPart == partId) {
            mergeIndexArray.add(
                IndexReader.open(
                    FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile())));
          }
        }
      }
    }
    IndexWriter indexWriter =
        new IndexWriter(
            FSDirectory.open(
                outputPath
                    .cat(MailConstants.PART_PRE + partId)
                    .cat(IndexBuilder.LUCENE_INDEX_DIR)
                    .asFile()),
            new IKAnalyzer(true),
            true,
            IndexWriter.MaxFieldLength.LIMITED);
    indexWriter.setMaxMergeDocs(1024);
    indexWriter.setMergeFactor(100);
    indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0]));
    indexWriter.close();
  }
Пример #7
0
  /** This function is only for test search. */
  public static List<String> searchQuery(
      String indexDir, String queryString, int numResults, CharArraySet stopwords) {
    String field = "contents";
    List<String> hitPaths = new ArrayList<String>();

    try {
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
      IndexSearcher searcher = new IndexSearcher(reader);

      Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query;
      query = parser.parse(QueryParser.escape(queryString));

      TopDocs results = searcher.search(query, null, numResults);
      for (ScoreDoc hit : results.scoreDocs) {
        String path = searcher.doc(hit.doc).get("path");
        hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt")
      }
    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (ParseException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

    return hitPaths;
  }
 static IndexWriter createWriter(String filename) throws IOException {
   IndexWriterConfig indexWriterConfig =
       new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
   indexWriterConfig.setRAMBufferSizeMB(100);
   indexWriterConfig.setOpenMode(OpenMode.CREATE);
   return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig);
 }
Пример #9
0
  public static void main(String[] args) throws IOException, ParseException {
    String indexDir = "C:/lucenedir";
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    int day = (int) (new Date().getTime() / Constans.DAY_MILLIS);
    QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
    Query query = parser.parse("java in action");
    Query customScoreQuery =
        new RecencyBoostCustomScoreQuery(query, 2.0, day, 6 * 365, "pubmonthAsDay");
    Sort sort =
        new Sort(
            new SortField[] {
              SortField.FIELD_SCORE, new SortField("title2", SortField.Type.STRING)
            });
    TopDocs hits = searcher.search(customScoreQuery, null, Integer.MAX_VALUE, sort, true, false);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
      // 两种方式取Document都行,其实searcher.doc内部本质还是调用reader.document
      // Document doc = reader.document(hits.scoreDocs[i].doc);
      Document doc = searcher.doc(hits.scoreDocs[i].doc);
      System.out.println(
          (1 + i)
              + ": "
              + doc.get("title")
              + ": pubmonth="
              + doc.get("pubmonth")
              + " score="
              + hits.scoreDocs[i].score);
    }
    reader.close();
    directory.close();
  }
  /**
   * Get the type of the target term representation, query with the suitable input formatted file
   * and the corresponding index
   *
   * @param type
   * @return HashMap<String,ArrayList<ScoreDoc>> A set of target terms with their extracted
   *     documents
   * @throws IOException
   * @throws ParseException
   */
  @Override
  public HashMap<String, ArrayList<ScoreDoc>> extractDocsByRepresentation()
      throws IOException, ParseException {
    String indexName = null, inputFileName = null;
    indexName = "modernJewishOnly";
    m_qg.setType(InputType.Query);
    inputFileName = "hozOrigQueryAll.txt";

    // read the suitable input file
    LinkedList<Pair<String, String>> queries = new LinkedList<Pair<String, String>>();
    BufferedReader reader = new BufferedReader(new FileReader(m_inputDir + inputFileName));
    String line = reader.readLine();
    while (line != null) {
      int index = line.indexOf("\t");
      queries.add(new Pair<String, String>(line.substring(0, index), line.substring(index + 1)));
      line = reader.readLine();
    }
    reader.close();

    // search for the queries in the index
    IndexSearcher searcher =
        new IndexSearcher(IndexReader.open(FSDirectory.open(new File(m_indexDir + indexName))));
    HashMap<String, ArrayList<ScoreDoc>> termDocs = new HashMap<String, ArrayList<ScoreDoc>>();
    for (Pair<String, String> term : queries) {
      Query q = m_qg.generate(term.value());
      termDocs.put(
          TargetTerm2Id.getStrDesc(Integer.parseInt(term.key())),
          new ArrayList<ScoreDoc>(Arrays.asList(searcher.search(q, 1000).scoreDocs)));
    }
    return termDocs;
  }
Пример #11
0
 /**
  * @Title: createIndex @Description: 建立索引
  *
  * @param @param documentList
  * @param @throws IOException
  * @return void
  * @throws
  */
 public static void createIndex(List<Document> documentList, String path) throws IOException {
   // 在当前路径下创建一个叫indexDir的目录
   File file = new File(path);
   String pathAll = file.getParentFile().getParentFile().toString() + "\\index";
   File indexDir = new File(pathAll);
   // 创建索引目录
   Directory directory = FSDirectory.open(indexDir);
   // 创建一个分词器
   Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
   // 创建索引配置器
   IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
   LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
   // 设置segment添加文档(Document)时的合并频率
   // 值较小,建立索引的速度就较慢
   // 值较大,建立索引的速度就较快,>10适合批量建立索引
   mergePolicy.setMergeFactor(50);
   // 设置segment最大合并文档(Document)数
   // 值较小有利于追加索引的速度
   // 值较大,适合批量建立索引和更快的搜索
   mergePolicy.setMaxMergeDocs(5000);
   // 启用复合式索引文件格式,合并多个segment
   mergePolicy.setUseCompoundFile(true);
   indexWriterConfig.setMergePolicy(mergePolicy);
   // 设置索引的打开模式
   indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
   // 创建索引器
   IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
   for (Document document : documentList) {
     // 把文档添加到索引库
     indexWriter.addDocument(document);
   }
   // 提交索引到磁盘上的索引库,关闭索引器
   indexWriter.close();
 }
Пример #12
0
  private void buildIndex(
      final List<GeoEntry> geoEntryList,
      final boolean create,
      final ProgressCallback progressCallback) {
    Directory directory;

    try {
      directory = FSDirectory.open(Paths.get(indexLocation));
    } catch (IOException e) {
      throw new GeoEntryIndexingException(
          "Couldn't open the directory for the index, " + indexLocation, e);
    }

    // Try-with-resources to ensure the IndexWriter always gets closed.
    try (final IndexWriter indexWriter = createIndexWriter(create, directory)) {
      try {
        indexGeoEntries(indexWriter, geoEntryList, progressCallback);
      } catch (IOException e) {
        // Need to roll back here before the IndexWriter is closed at the end of the try
        // block.
        indexWriter.rollback();
        throw e;
      }
    } catch (IOException e) {
      throw new GeoEntryIndexingException("Error writing to the index.", e);
    }
  }
 /**
  * Get the index writer/searcher wrapper for the given connection.
  *
  * @param conn the connection
  * @return the index access wrapper
  */
 protected static IndexAccess getIndexAccess(Connection conn) throws SQLException {
   String path = getIndexPath(conn);
   synchronized (INDEX_ACCESS) {
     IndexAccess access = INDEX_ACCESS.get(path);
     if (access == null) {
       try {
         /*## LUCENE2 ##
         boolean recreate = !IndexReader.indexExists(path);
         Analyzer analyzer = new StandardAnalyzer();
         access = new IndexAccess();
         access.modifier = new IndexModifier(path, analyzer, recreate);
         //*/
         // ## LUCENE3 ##
         File f = new File(path);
         Directory indexDir = FSDirectory.open(f);
         boolean recreate = !IndexReader.indexExists(indexDir);
         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
         IndexWriter writer =
             new IndexWriter(indexDir, analyzer, recreate, IndexWriter.MaxFieldLength.UNLIMITED);
         // see http://wiki.apache.org/lucene-java/NearRealtimeSearch
         IndexReader reader = writer.getReader();
         access = new IndexAccess();
         access.writer = writer;
         access.reader = reader;
         access.searcher = new IndexSearcher(reader);
         // */
       } catch (IOException e) {
         throw convertException(e);
       }
       INDEX_ACCESS.put(path, access);
     }
     return access;
   }
 }
Пример #14
0
 public void computeErrorRate(ImageSearcher searcher, String prefix)
     throws IOException, InstantiationException, IllegalAccessException {
   //        int maxHits = 10;
   IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(testIndex)));
   for (Iterator<String> testIterator = testcases.keySet().iterator(); testIterator.hasNext(); ) {
     queryImage = testIterator.next();
     Document query;
     if (cutImages) {
       BufferedImage bimg =
           ImageUtils.cropImage(ImageIO.read(new FileInputStream(queryImage)), 0, 0, 200, 69);
       query = builder.createDocument(new FileInputStream(queryImage), queryImage);
     } else query = builder.createDocument(new FileInputStream(queryImage), queryImage);
     ImageSearchHits hits = searcher.search(query, reader);
     // hits = rerank(hits, query, ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT);
     for (int i = 0; i < hits.length(); i++) {
       if (hits.doc(i)
           .get("descriptorImageIdentifier")
           .toLowerCase()
           .endsWith(testcases.get(queryImage))) {
         System.out.println(
             queryImage.substring(queryImage.lastIndexOf('\\') + 1)
                 + "-"
                 + prefix
                 + " -> Found at rank "
                 + i
                 + " ("
                 + hits.length()
                 + ")");
       }
     }
     // saveToHtml(queryImage.substring(queryImage.lastIndexOf('\\') + 1) + "-" + prefix, hits,
     // queryImage);
   }
 }
Пример #15
0
  @Before
  public void init() throws IOException {

    indexSearcher =
        new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File("C:\\lucenedata\\indexdata"))));
  }
Пример #16
0
 public void writeIndex(IndexingValue indexingValue) throws Exception {
   boolean create = true;
   File indexDir = new File(getIndexPath());
   if (!indexDir.exists()) {
     indexDir.mkdirs();
   } else {
     if (indexDir.list().length > 0) {
       create = false;
     }
   }
   Directory dir = FSDirectory.open(indexDir);
   IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
   if (create) {
     iwc.setOpenMode(OpenMode.CREATE);
   } else {
     iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   }
   IndexWriter writer = null;
   try {
     writer = new IndexWriter(dir, iwc);
     addDoc(writer, indexingValue);
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
Пример #17
0
 @Override
 public void deleteOnCreator(Integer creator) throws Exception {
   boolean create = true;
   File indexDir = new File(getIndexPath());
   if (!indexDir.exists()) {
     indexDir.mkdirs();
   } else {
     if (indexDir.list().length > 0) {
       create = false;
     }
   }
   Directory dir = FSDirectory.open(indexDir);
   IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
   if (create) {
     iwc.setOpenMode(OpenMode.CREATE);
   } else {
     iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   }
   IndexWriter writer = null;
   try {
     writer = new IndexWriter(dir, iwc);
     writer.deleteDocuments(
         new Term(
             FIELD_LABEL_CREATE_USER,
             StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT)));
     writer.commit();
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
Пример #18
0
  @Override
  public void deleteItem(String id) throws Exception {
    boolean create = true;
    File indexDir = new File(getIndexPath());
    if (!indexDir.exists()) {
      indexDir.mkdirs();
    } else {
      if (indexDir.list().length > 0) {
        create = false;
      }
    }
    Directory dir = FSDirectory.open(indexDir);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
    if (create) {
      iwc.setOpenMode(OpenMode.CREATE);
    } else {
      iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    IndexWriter writer = null;
    try {
      writer = new IndexWriter(dir, iwc);
      writer.deleteDocuments(new Term(FIELD_LABEL_ID, id));
      writer.commit();

    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
Пример #19
0
 public Searcher(String indexDirectoryPath) throws IOException {
   Directory indexDirectory = FSDirectory.open(new File(indexDirectoryPath));
   indexSearcher = new IndexSearcher(indexDirectory);
   queryParser =
       new QueryParser(
           Version.LUCENE_36, Constants.CONTENTS, new StandardAnalyzer(Version.LUCENE_36));
 }
Пример #20
0
 /** Open an IndexWriter, executing error handling as needed. */
 private IndexWriter openIndexWriter(File searchIndexPath, boolean create) throws IOException {
   // NFS doesn't work with Lucene default locking as of Lucene 3.3, so use
   // SimpleFSLockFactory instead.
   LockFactory lockFactory = new SimpleFSLockFactory();
   FSDirectory fsDirectory = FSDirectory.open(searchIndexPath, lockFactory);
   IndexWriter indexWriter = null;
   try {
     indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create));
   } catch (LockObtainFailedException e) {
     logger.warn(
         "Unable to obtain lock for "
             + searchIndexPath.getAbsolutePath()
             + ".  Attempting to forcibly unlock the index.");
     if (IndexWriter.isLocked(fsDirectory)) {
       try {
         IndexWriter.unlock(fsDirectory);
         logger.info(
             "Successfully unlocked search directory " + searchIndexPath.getAbsolutePath());
       } catch (IOException ex) {
         logger.warn(
             "Unable to unlock search directory "
                 + searchIndexPath.getAbsolutePath()
                 + " "
                 + ex.toString());
       }
     }
   }
   if (indexWriter == null) {
     // try again, there could have been a stale lock
     indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create));
   }
   return indexWriter;
 }
Пример #21
0
  @Override
  public synchronized void init(XWikiContext context) {
    LOGGER.debug("Lucene plugin: in init");

    this.indexDirs = context.getWiki().Param(PROP_INDEX_DIR);
    if (StringUtils.isEmpty(this.indexDirs)) {
      File workDir = getLuceneWorkDirectory();
      this.indexDirs = workDir.getAbsolutePath();
    }
    String indexDir = StringUtils.split(this.indexDirs, ",")[0];

    File f = new File(indexDir);
    Directory directory;
    try {
      if (!f.exists()) {
        f.mkdirs();
      }
      directory = FSDirectory.open(f);
    } catch (IOException e) {
      LOGGER.error("Failed to open the index directory: ", e);
      throw new RuntimeException(e);
    }

    init(directory, context);
  }
Пример #22
0
  public static void search(String indexDir, String q) throws IOException, ParseException {
    // 3) Open index
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
    IndexSearcher is = new IndexSearcher(reader);

    // 4) Parser query
    QueryParser parser = new QueryParser(VER, "contents", new StandardAnalyzer(VER));
    Query query;
    query = parser.parse(q);

    // 5) Search index
    long start = System.currentTimeMillis();
    TopDocs hits = is.search(query, 10);
    long end = System.currentTimeMillis();

    // 6) Write search stat
    System.err.println(
        "Found "
            + hits.totalHits
            + " document(s) (in "
            + (end - start)
            + " milliseconds) that matched query '"
            + q
            + "':");

    // 7) Retrieve matching docs
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
      Document doc = is.doc(scoreDoc.doc);
      System.out.println(doc.get("fullpath"));
    }

    // 8) Close IndexSearcher
    reader.close();
  }
Пример #23
0
 public void testRerankFilters() throws IOException {
   int queryDocID = (int) (Math.random() * 10000);
   IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large")));
   // select one feature for the large index:
   int featureIndex = 4;
   int count = 0;
   long ms = System.currentTimeMillis();
   ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader);
   RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD);
   LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD);
   FileUtils.saveImageResultsToPng(
       "GeneralTest_rerank_0_old",
       hits,
       reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
   hits = rerank.filter(hits, reader.document(queryDocID));
   FileUtils.saveImageResultsToPng(
       "GeneralTest_rerank_1_new",
       hits,
       reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
   hits = lsa.filter(hits, reader.document(queryDocID));
   FileUtils.saveImageResultsToPng(
       "GeneralTest_rerank_2_lsa",
       hits,
       reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
 }
  public static void main(String[] args) throws IOException {

    // David'
    final String root = "C:\\Users\\David IB\\Dropbox\\STEP-Tagging\\autoTag\\Bibles\\";
    final String strongs = FileUtils.readFileToString(new File(root + "bible.s"));
    final String other = FileUtils.readFileToString(new File(root + "bible.o"));
    final String results = FileUtils.readFileToString(new File(root + "training.align"));
    final String keyFile = FileUtils.readFileToString(new File(root + "keyList.txt"));
    /**
     * Chris' final String root = "C:\\temp\\berkeley\\berkeleyBibles\\output\\"; final String
     * strongs = FileUtils.readFileToString(new File(root + "bible.s")); final String other =
     * FileUtils.readFileToString(new File(root + "bible.o")); final String results =
     * FileUtils.readFileToString(new File(root + "training.align")); final String keyFile =
     * FileUtils.readFileToString(new File(root + "keyList-nt.txt"));
     */
    List<String[]> strongSentences = splitByWord(strongs);
    List<String[]> otherSentences = splitByWord(other);
    List<String[]> resultSentences = splitByWord(results);
    List<String[]> keyList = splitByWord(keyFile);

    final File path =
        new File("C:\\Users\\David IB\\AppData\\Roaming\\JSword\\step\\entities\\definition");
    //      final File path = new
    // File("C:\\Users\\Chris\\AppData\\Roaming\\JSword\\step\\entities\\definition");
    FSDirectory directory = FSDirectory.open(path);
    final IndexSearcher indexSearcher = new IndexSearcher(directory);

    String resultTagging =
        parseResultsAsTable(
            resultSentences, strongSentences, otherSentences, indexSearcher, keyList);
    FileUtils.writeStringToFile(new File(root + "positionalTagging-table.txt"), resultTagging);
  }
Пример #25
0
  /**
   * give the id list of sentences, from Lucene index
   *
   * @param input input word
   * @param catalogName catalog (domain) name which we'd like to search in
   * @param limit how many hits are needed (0 means all)
   */
  public List<String> query(String input, String catalogName, int limit) {

    List<String> res = new ArrayList<String>();
    try {

      catalog c = catalogs.get(catalogName);
      IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath)));
      IndexSearcher searcher = new IndexSearcher(reader);

      QueryParser parser = new QueryParser("contents", analyzer);
      Query query = parser.parse(QueryParser.escape(input));

      int n = limit > 0 ? limit : searcher.count(query);
      if (n == 0) n = 1;
      TopDocs results = searcher.search(query, n);

      int endPos = limit;
      if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits
      else endPos = results.totalHits; // all hits

      for (int i = 0; i < endPos; i++) {
        int id = results.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        res.add(doc.get("filename"));
      }
      reader.close();
      return res;

    } catch (ParseException e) {
      log(e.getMessage());
    } catch (IOException e) {
      log(e.getMessage());
    }
    return res;
  }
Пример #26
0
  @Test
  public void search() throws IOException {
    String[] q = {"title", "content"};

    String filePath = "e:/elewordIndex/LuceneArticle";
    Directory dir = FSDirectory.open(new File(filePath));
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher search = new IndexSearcher(reader);

    // Query query =SearchHelper.makeQuery("content", "网络", 0.3f);

    Query query = SearchHelper.makeMultiQueryFiled(q, "顶顶", 0.8f);
    TopDocs topDocs = search.search(query, 20);
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    System.out.println("共:" + topDocs.totalHits + "条结果");

    for (ScoreDoc doc : scoreDocs) {
      int docId = doc.doc;
      Document document = search.doc(docId);

      String id = document.get("id");
      String title = document.get("title");
      System.out.println("------------------------------------------------------------------");
      System.out.println("id=" + id + "    title=" + title);
    }
  }
  public static void main(String[] args) throws IOException {

    if (args.length != 2) {
      System.out.println(
          "Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell");
      System.exit(1);
    }

    String spellCheckDir = args[0];
    String wordToRespell = args[1];

    Directory dir = FSDirectory.open(new File(spellCheckDir));
    if (!IndexReader.indexExists(dir)) {
      System.out.println(
          "\nERROR: No spellchecker index at path \""
              + spellCheckDir
              + "\"; please run CreateSpellCheckerIndex first\n");
      System.exit(1);
    }
    SpellChecker spell = new SpellChecker(dir); // #A

    spell.setStringDistance(new LevensteinDistance()); // #B
    // spell.setStringDistance(new JaroWinklerDistance());

    String[] suggestions = spell.suggestSimilar(wordToRespell, 5); // #C
    System.out.println(suggestions.length + " suggestions for '" + wordToRespell + "':");
    for (String suggestion : suggestions) System.out.println("  " + suggestion);
  }
Пример #28
0
  @SuppressWarnings("PMD.CollapsibleIfStatements")
  private void initialize() throws IOException {
    synchronized (this) {
      RuntimeEnvironment env = RuntimeEnvironment.getInstance();
      File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
      if (project != null) {
        indexDir = new File(indexDir, project.getPath());
      }

      if (!indexDir.exists() && !indexDir.mkdirs()) {
        // to avoid race conditions, just recheck..
        if (!indexDir.exists()) {
          throw new FileNotFoundException(
              "Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
        }
      }

      if (!env.isUsingLuceneLocking()) {
        lockfact = NoLockFactory.INSTANCE;
      }
      indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact);
      ignoredNames = env.getIgnoredNames();
      includedNames = env.getIncludedNames();
      analyzerGuru = new AnalyzerGuru();
      if (env.isGenerateHtml()) {
        xrefDir = new File(env.getDataRootFile(), "xref");
      }
      listeners = new ArrayList<>();
      dirtyFile = new File(indexDir, "dirty");
      dirty = dirtyFile.exists();
      directories = new ArrayList<>();
    }
  }
  public void startSearch(String searchString) throws IOException {

    /*analyze(searchString);*/

    try {
      Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс
      IndexSearcher is = new IndexSearcher(directory); // объект поиска
      QueryParser parser =
          new QueryParser(
              Version.LUCENE_31,
              "name",
              new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор
      /* String str1 = "фотоаппарат";
      String str2 = "телевизор";
      String str3 = "SONY";
      String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3;
      System.out.println(total);*/
      Query query = parser.parse(searchString); // что ищем
      TopDocs results =
          is.search(
              query, null,
              10); // включаем поиск ограничиваемся 10 документами, results содержит ...
      System.out.println(
          "getMaxScore()="
              + results.getMaxScore()
              + " totalHits="
              + results
                  .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество
      // найденных документов

      /*proposalController.getProposalList().clear();*/

      for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки
        Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc

        for (Proposal proposal :
            proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) {

          proposalController.getProposalList().add(proposal);
          _log.info(
              "Предложение найдено:"
                  + proposal.getRecid().toString()
                  + ",Товар: "
                  + doc.get("recid")
                  + ", "
                  + doc.get("name"));
        }

        /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет
        addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/
      }

      directory.close();
    } catch (ParseException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    addMessage("Поиск выполнен");
  }
 private LibrarySearchOperationsImpl() {
   try {
     ramDir = FSDirectory.open(new File(INDEX_LOCATION));
   } catch (IOException ioe) {
     log.error("Could not initialize index location! ", ioe);
   }
 }