@PostConstruct public void createOrVerifyIndex() throws Exception { LOGGER.info("Initializing Index..........................please Wait..0%"); index = new File(appproperties.getLuceneIndexPath()); suggest = new File(appproperties.getLiceneSuggestIndexPath()); directory = FSDirectory.open(index, NoLockFactory.getNoLockFactory()); suggestDirectory = FSDirectory.open(suggest, NoLockFactory.getNoLockFactory()); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(directory, iwc); writer.commit(); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader, executorService); parser = new MultiFieldQueryParser(new String[] {TITLE_FIELD, CONTENTS_FIELD}, analyzer); suggester = new AnalyzingInfixSuggester( Version.LATEST, suggestDirectory, analyzer, analyzer, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS); if (!suggest.exists() && !suggest.isDirectory()) { LOGGER.info( "Lucene Suggest did not exist.........................................Building Please wait.........0%"); suggester.build(new IndexFileIterator(new ArrayList<IndexFile>().iterator())); suggester.refresh(); LOGGER.info( "Lucene Suggest Build Complete...................................................................100%"); } LOGGER.info("Lucene Ready............................................100%"); }
public void useFSDirectory(final String indexDirectory) throws Exception { if (!IndexReader.indexExists(FSDirectory.open(new File(indexDirectory)))) { this.createIndexDirectory(indexDirectory); } this.directory = FSDirectory.open(new File(indexDirectory)); }
/** * Compares indexes at different location based on the key field which should contain the unique * value. * * @param indexDir1 indexDir1 * @param indexDir2 indexDir2 * @param keyFieldName keyFieldName * @return result of the compare * @throws IOException problems accessing indexes * @throws ParseException problems parsing query */ public Diff<Document, Diff<Fieldable, DocumentDiff>> compare( String indexDir1, String indexDir2, String keyFieldName) throws IOException, ParseException { FSDirectory dir1 = FSDirectory.open(new File(indexDir1)); IndexReader reader1 = IndexReader.open(dir1); FSDirectory dir2 = FSDirectory.open(new File(indexDir2)); IndexReader reader2 = IndexReader.open(dir2); return compare(reader1, reader2, keyFieldName); }
/** * Provides basic search functions ... * * @param img * @param indexPath * @return * @throws IOException */ public TopDocs search(BufferedImage img, String indexPath) throws IOException { ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); ImageSearchHits hits = searcher.search(img, DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")))); StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } return scoreDocs(sb.toString(), DirectoryReader.open(FSDirectory.open(new File(indexPath)))); }
/** * We assume that the initial indexing has been done and a set of reference objects has been found * and indexed in the separate directory. However further documents were added and they now need * to get a ranked list of reference objects. So we (i) get all these new documents missing the * field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
private void mergeIndexByPart( Path minorPath, Path inputPath, Path outputPath, int partId, int localIndexVer, int maxVersion, int partNo) throws CorruptIndexException, IOException { List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>(); if (minorPath != null && PathUtil.exists(minorPath)) { if (PathUtil.exists(minorPath.cat(partId + ""))) { mergeIndexArray.add( IndexReader.open( FSDirectory.open( minorPath .cat(MailConstants.PART_PRE + partId) .cat(IndexBuilder.LUCENE_INDEX_DIR) .asFile()))); } } for (int i = localIndexVer + 1; i <= maxVersion; i++) { Path segPath = inputPath.cat(i + ""); Path[] userPathes = segPath.listPathes(); for (Path userPath : userPathes) { if (!userPath.getName().equals("built")) { int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo); if (PathUtil.exists(segPath) && shouldInPart == partId) { mergeIndexArray.add( IndexReader.open( FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile()))); } } } } IndexWriter indexWriter = new IndexWriter( FSDirectory.open( outputPath .cat(MailConstants.PART_PRE + partId) .cat(IndexBuilder.LUCENE_INDEX_DIR) .asFile()), new IKAnalyzer(true), true, IndexWriter.MaxFieldLength.LIMITED); indexWriter.setMaxMergeDocs(1024); indexWriter.setMergeFactor(100); indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0])); indexWriter.close(); }
/** This function is only for test search. */ public static List<String> searchQuery( String indexDir, String queryString, int numResults, CharArraySet stopwords) { String field = "contents"; List<String> hitPaths = new ArrayList<String>(); try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords); QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer); Query query; query = parser.parse(QueryParser.escape(queryString)); TopDocs results = searcher.search(query, null, numResults); for (ScoreDoc hit : results.scoreDocs) { String path = searcher.doc(hit.doc).get("path"); hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt") } } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } catch (ParseException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } return hitPaths; }
static IndexWriter createWriter(String filename) throws IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); indexWriterConfig.setRAMBufferSizeMB(100); indexWriterConfig.setOpenMode(OpenMode.CREATE); return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig); }
public static void main(String[] args) throws IOException, ParseException { String indexDir = "C:/lucenedir"; Directory directory = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int day = (int) (new Date().getTime() / Constans.DAY_MILLIS); QueryParser parser = new QueryParser("contents", new StandardAnalyzer()); Query query = parser.parse("java in action"); Query customScoreQuery = new RecencyBoostCustomScoreQuery(query, 2.0, day, 6 * 365, "pubmonthAsDay"); Sort sort = new Sort( new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.Type.STRING) }); TopDocs hits = searcher.search(customScoreQuery, null, Integer.MAX_VALUE, sort, true, false); for (int i = 0; i < hits.scoreDocs.length; i++) { // 两种方式取Document都行,其实searcher.doc内部本质还是调用reader.document // Document doc = reader.document(hits.scoreDocs[i].doc); Document doc = searcher.doc(hits.scoreDocs[i].doc); System.out.println( (1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score=" + hits.scoreDocs[i].score); } reader.close(); directory.close(); }
/** * Get the type of the target term representation, query with the suitable input formatted file * and the corresponding index * * @param type * @return HashMap<String,ArrayList<ScoreDoc>> A set of target terms with their extracted * documents * @throws IOException * @throws ParseException */ @Override public HashMap<String, ArrayList<ScoreDoc>> extractDocsByRepresentation() throws IOException, ParseException { String indexName = null, inputFileName = null; indexName = "modernJewishOnly"; m_qg.setType(InputType.Query); inputFileName = "hozOrigQueryAll.txt"; // read the suitable input file LinkedList<Pair<String, String>> queries = new LinkedList<Pair<String, String>>(); BufferedReader reader = new BufferedReader(new FileReader(m_inputDir + inputFileName)); String line = reader.readLine(); while (line != null) { int index = line.indexOf("\t"); queries.add(new Pair<String, String>(line.substring(0, index), line.substring(index + 1))); line = reader.readLine(); } reader.close(); // search for the queries in the index IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(m_indexDir + indexName)))); HashMap<String, ArrayList<ScoreDoc>> termDocs = new HashMap<String, ArrayList<ScoreDoc>>(); for (Pair<String, String> term : queries) { Query q = m_qg.generate(term.value()); termDocs.put( TargetTerm2Id.getStrDesc(Integer.parseInt(term.key())), new ArrayList<ScoreDoc>(Arrays.asList(searcher.search(q, 1000).scoreDocs))); } return termDocs; }
/** * @Title: createIndex @Description: 建立索引 * * @param @param documentList * @param @throws IOException * @return void * @throws */ public static void createIndex(List<Document> documentList, String path) throws IOException { // 在当前路径下创建一个叫indexDir的目录 File file = new File(path); String pathAll = file.getParentFile().getParentFile().toString() + "\\index"; File indexDir = new File(pathAll); // 创建索引目录 Directory directory = FSDirectory.open(indexDir); // 创建一个分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); // 创建索引配置器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); // 设置segment添加文档(Document)时的合并频率 // 值较小,建立索引的速度就较慢 // 值较大,建立索引的速度就较快,>10适合批量建立索引 mergePolicy.setMergeFactor(50); // 设置segment最大合并文档(Document)数 // 值较小有利于追加索引的速度 // 值较大,适合批量建立索引和更快的搜索 mergePolicy.setMaxMergeDocs(5000); // 启用复合式索引文件格式,合并多个segment mergePolicy.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(mergePolicy); // 设置索引的打开模式 indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); // 创建索引器 IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); for (Document document : documentList) { // 把文档添加到索引库 indexWriter.addDocument(document); } // 提交索引到磁盘上的索引库,关闭索引器 indexWriter.close(); }
private void buildIndex( final List<GeoEntry> geoEntryList, final boolean create, final ProgressCallback progressCallback) { Directory directory; try { directory = FSDirectory.open(Paths.get(indexLocation)); } catch (IOException e) { throw new GeoEntryIndexingException( "Couldn't open the directory for the index, " + indexLocation, e); } // Try-with-resources to ensure the IndexWriter always gets closed. try (final IndexWriter indexWriter = createIndexWriter(create, directory)) { try { indexGeoEntries(indexWriter, geoEntryList, progressCallback); } catch (IOException e) { // Need to roll back here before the IndexWriter is closed at the end of the try // block. indexWriter.rollback(); throw e; } } catch (IOException e) { throw new GeoEntryIndexingException("Error writing to the index.", e); } }
/** * Get the index writer/searcher wrapper for the given connection. * * @param conn the connection * @return the index access wrapper */ protected static IndexAccess getIndexAccess(Connection conn) throws SQLException { String path = getIndexPath(conn); synchronized (INDEX_ACCESS) { IndexAccess access = INDEX_ACCESS.get(path); if (access == null) { try { /*## LUCENE2 ## boolean recreate = !IndexReader.indexExists(path); Analyzer analyzer = new StandardAnalyzer(); access = new IndexAccess(); access.modifier = new IndexModifier(path, analyzer, recreate); //*/ // ## LUCENE3 ## File f = new File(path); Directory indexDir = FSDirectory.open(f); boolean recreate = !IndexReader.indexExists(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexWriter writer = new IndexWriter(indexDir, analyzer, recreate, IndexWriter.MaxFieldLength.UNLIMITED); // see http://wiki.apache.org/lucene-java/NearRealtimeSearch IndexReader reader = writer.getReader(); access = new IndexAccess(); access.writer = writer; access.reader = reader; access.searcher = new IndexSearcher(reader); // */ } catch (IOException e) { throw convertException(e); } INDEX_ACCESS.put(path, access); } return access; } }
public void computeErrorRate(ImageSearcher searcher, String prefix) throws IOException, InstantiationException, IllegalAccessException { // int maxHits = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(testIndex))); for (Iterator<String> testIterator = testcases.keySet().iterator(); testIterator.hasNext(); ) { queryImage = testIterator.next(); Document query; if (cutImages) { BufferedImage bimg = ImageUtils.cropImage(ImageIO.read(new FileInputStream(queryImage)), 0, 0, 200, 69); query = builder.createDocument(new FileInputStream(queryImage), queryImage); } else query = builder.createDocument(new FileInputStream(queryImage), queryImage); ImageSearchHits hits = searcher.search(query, reader); // hits = rerank(hits, query, ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT); for (int i = 0; i < hits.length(); i++) { if (hits.doc(i) .get("descriptorImageIdentifier") .toLowerCase() .endsWith(testcases.get(queryImage))) { System.out.println( queryImage.substring(queryImage.lastIndexOf('\\') + 1) + "-" + prefix + " -> Found at rank " + i + " (" + hits.length() + ")"); } } // saveToHtml(queryImage.substring(queryImage.lastIndexOf('\\') + 1) + "-" + prefix, hits, // queryImage); } }
@Before public void init() throws IOException { indexSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File("C:\\lucenedata\\indexdata")))); }
public void writeIndex(IndexingValue indexingValue) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); addDoc(writer, indexingValue); } finally { if (writer != null) { writer.close(); } } }
@Override public void deleteOnCreator(Integer creator) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments( new Term( FIELD_LABEL_CREATE_USER, StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT))); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
@Override public void deleteItem(String id) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term(FIELD_LABEL_ID, id)); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
public Searcher(String indexDirectoryPath) throws IOException { Directory indexDirectory = FSDirectory.open(new File(indexDirectoryPath)); indexSearcher = new IndexSearcher(indexDirectory); queryParser = new QueryParser( Version.LUCENE_36, Constants.CONTENTS, new StandardAnalyzer(Version.LUCENE_36)); }
/** Open an IndexWriter, executing error handling as needed. */ private IndexWriter openIndexWriter(File searchIndexPath, boolean create) throws IOException { // NFS doesn't work with Lucene default locking as of Lucene 3.3, so use // SimpleFSLockFactory instead. LockFactory lockFactory = new SimpleFSLockFactory(); FSDirectory fsDirectory = FSDirectory.open(searchIndexPath, lockFactory); IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create)); } catch (LockObtainFailedException e) { logger.warn( "Unable to obtain lock for " + searchIndexPath.getAbsolutePath() + ". Attempting to forcibly unlock the index."); if (IndexWriter.isLocked(fsDirectory)) { try { IndexWriter.unlock(fsDirectory); logger.info( "Successfully unlocked search directory " + searchIndexPath.getAbsolutePath()); } catch (IOException ex) { logger.warn( "Unable to unlock search directory " + searchIndexPath.getAbsolutePath() + " " + ex.toString()); } } } if (indexWriter == null) { // try again, there could have been a stale lock indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create)); } return indexWriter; }
@Override public synchronized void init(XWikiContext context) { LOGGER.debug("Lucene plugin: in init"); this.indexDirs = context.getWiki().Param(PROP_INDEX_DIR); if (StringUtils.isEmpty(this.indexDirs)) { File workDir = getLuceneWorkDirectory(); this.indexDirs = workDir.getAbsolutePath(); } String indexDir = StringUtils.split(this.indexDirs, ",")[0]; File f = new File(indexDir); Directory directory; try { if (!f.exists()) { f.mkdirs(); } directory = FSDirectory.open(f); } catch (IOException e) { LOGGER.error("Failed to open the index directory: ", e); throw new RuntimeException(e); } init(directory, context); }
public static void search(String indexDir, String q) throws IOException, ParseException { // 3) Open index IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher is = new IndexSearcher(reader); // 4) Parser query QueryParser parser = new QueryParser(VER, "contents", new StandardAnalyzer(VER)); Query query; query = parser.parse(q); // 5) Search index long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); // 6) Write search stat System.err.println( "Found " + hits.totalHits + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); // 7) Retrieve matching docs for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullpath")); } // 8) Close IndexSearcher reader.close(); }
public void testRerankFilters() throws IOException { int queryDocID = (int) (Math.random() * 10000); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 4; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_0_old", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = rerank.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_1_new", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = lsa.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_2_lsa", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
public static void main(String[] args) throws IOException { // David' final String root = "C:\\Users\\David IB\\Dropbox\\STEP-Tagging\\autoTag\\Bibles\\"; final String strongs = FileUtils.readFileToString(new File(root + "bible.s")); final String other = FileUtils.readFileToString(new File(root + "bible.o")); final String results = FileUtils.readFileToString(new File(root + "training.align")); final String keyFile = FileUtils.readFileToString(new File(root + "keyList.txt")); /** * Chris' final String root = "C:\\temp\\berkeley\\berkeleyBibles\\output\\"; final String * strongs = FileUtils.readFileToString(new File(root + "bible.s")); final String other = * FileUtils.readFileToString(new File(root + "bible.o")); final String results = * FileUtils.readFileToString(new File(root + "training.align")); final String keyFile = * FileUtils.readFileToString(new File(root + "keyList-nt.txt")); */ List<String[]> strongSentences = splitByWord(strongs); List<String[]> otherSentences = splitByWord(other); List<String[]> resultSentences = splitByWord(results); List<String[]> keyList = splitByWord(keyFile); final File path = new File("C:\\Users\\David IB\\AppData\\Roaming\\JSword\\step\\entities\\definition"); // final File path = new // File("C:\\Users\\Chris\\AppData\\Roaming\\JSword\\step\\entities\\definition"); FSDirectory directory = FSDirectory.open(path); final IndexSearcher indexSearcher = new IndexSearcher(directory); String resultTagging = parseResultsAsTable( resultSentences, strongSentences, otherSentences, indexSearcher, keyList); FileUtils.writeStringToFile(new File(root + "positionalTagging-table.txt"), resultTagging); }
/** * give the id list of sentences, from Lucene index * * @param input input word * @param catalogName catalog (domain) name which we'd like to search in * @param limit how many hits are needed (0 means all) */ public List<String> query(String input, String catalogName, int limit) { List<String> res = new ArrayList<String>(); try { catalog c = catalogs.get(catalogName); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath))); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(QueryParser.escape(input)); int n = limit > 0 ? limit : searcher.count(query); if (n == 0) n = 1; TopDocs results = searcher.search(query, n); int endPos = limit; if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits else endPos = results.totalHits; // all hits for (int i = 0; i < endPos; i++) { int id = results.scoreDocs[i].doc; Document doc = searcher.doc(id); res.add(doc.get("filename")); } reader.close(); return res; } catch (ParseException e) { log(e.getMessage()); } catch (IOException e) { log(e.getMessage()); } return res; }
@Test public void search() throws IOException { String[] q = {"title", "content"}; String filePath = "e:/elewordIndex/LuceneArticle"; Directory dir = FSDirectory.open(new File(filePath)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher search = new IndexSearcher(reader); // Query query =SearchHelper.makeQuery("content", "网络", 0.3f); Query query = SearchHelper.makeMultiQueryFiled(q, "顶顶", 0.8f); TopDocs topDocs = search.search(query, 20); ScoreDoc[] scoreDocs = topDocs.scoreDocs; System.out.println("共:" + topDocs.totalHits + "条结果"); for (ScoreDoc doc : scoreDocs) { int docId = doc.doc; Document document = search.doc(docId); String id = document.get("id"); String title = document.get("title"); System.out.println("------------------------------------------------------------------"); System.out.println("id=" + id + " title=" + title); } }
public static void main(String[] args) throws IOException { if (args.length != 2) { System.out.println( "Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell"); System.exit(1); } String spellCheckDir = args[0]; String wordToRespell = args[1]; Directory dir = FSDirectory.open(new File(spellCheckDir)); if (!IndexReader.indexExists(dir)) { System.out.println( "\nERROR: No spellchecker index at path \"" + spellCheckDir + "\"; please run CreateSpellCheckerIndex first\n"); System.exit(1); } SpellChecker spell = new SpellChecker(dir); // #A spell.setStringDistance(new LevensteinDistance()); // #B // spell.setStringDistance(new JaroWinklerDistance()); String[] suggestions = spell.suggestSimilar(wordToRespell, 5); // #C System.out.println(suggestions.length + " suggestions for '" + wordToRespell + "':"); for (String suggestion : suggestions) System.out.println(" " + suggestion); }
@SuppressWarnings("PMD.CollapsibleIfStatements") private void initialize() throws IOException { synchronized (this) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File indexDir = new File(env.getDataRootFile(), INDEX_DIR); if (project != null) { indexDir = new File(indexDir, project.getPath()); } if (!indexDir.exists() && !indexDir.mkdirs()) { // to avoid race conditions, just recheck.. if (!indexDir.exists()) { throw new FileNotFoundException( "Failed to create root directory [" + indexDir.getAbsolutePath() + "]"); } } if (!env.isUsingLuceneLocking()) { lockfact = NoLockFactory.INSTANCE; } indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact); ignoredNames = env.getIgnoredNames(); includedNames = env.getIncludedNames(); analyzerGuru = new AnalyzerGuru(); if (env.isGenerateHtml()) { xrefDir = new File(env.getDataRootFile(), "xref"); } listeners = new ArrayList<>(); dirtyFile = new File(indexDir, "dirty"); dirty = dirtyFile.exists(); directories = new ArrayList<>(); } }
public void startSearch(String searchString) throws IOException { /*analyze(searchString);*/ try { Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс IndexSearcher is = new IndexSearcher(directory); // объект поиска QueryParser parser = new QueryParser( Version.LUCENE_31, "name", new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор /* String str1 = "фотоаппарат"; String str2 = "телевизор"; String str3 = "SONY"; String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3; System.out.println(total);*/ Query query = parser.parse(searchString); // что ищем TopDocs results = is.search( query, null, 10); // включаем поиск ограничиваемся 10 документами, results содержит ... System.out.println( "getMaxScore()=" + results.getMaxScore() + " totalHits=" + results .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество // найденных документов /*proposalController.getProposalList().clear();*/ for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc for (Proposal proposal : proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) { proposalController.getProposalList().add(proposal); _log.info( "Предложение найдено:" + proposal.getRecid().toString() + ",Товар: " + doc.get("recid") + ", " + doc.get("name")); } /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/ } directory.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } addMessage("Поиск выполнен"); }
private LibrarySearchOperationsImpl() { try { ramDir = FSDirectory.open(new File(INDEX_LOCATION)); } catch (IOException ioe) { log.error("Could not initialize index location! ", ioe); } }