public void useFSDirectory(final String indexDirectory) throws Exception { if (!IndexReader.indexExists(FSDirectory.open(new File(indexDirectory)))) { this.createIndexDirectory(indexDirectory); } this.directory = FSDirectory.open(new File(indexDirectory)); }
@PostConstruct public void createOrVerifyIndex() throws Exception { LOGGER.info("Initializing Index..........................please Wait..0%"); index = new File(appproperties.getLuceneIndexPath()); suggest = new File(appproperties.getLiceneSuggestIndexPath()); directory = FSDirectory.open(index, NoLockFactory.getNoLockFactory()); suggestDirectory = FSDirectory.open(suggest, NoLockFactory.getNoLockFactory()); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(directory, iwc); writer.commit(); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader, executorService); parser = new MultiFieldQueryParser(new String[] {TITLE_FIELD, CONTENTS_FIELD}, analyzer); suggester = new AnalyzingInfixSuggester( Version.LATEST, suggestDirectory, analyzer, analyzer, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS); if (!suggest.exists() && !suggest.isDirectory()) { LOGGER.info( "Lucene Suggest did not exist.........................................Building Please wait.........0%"); suggester.build(new IndexFileIterator(new ArrayList<IndexFile>().iterator())); suggester.refresh(); LOGGER.info( "Lucene Suggest Build Complete...................................................................100%"); } LOGGER.info("Lucene Ready............................................100%"); }
/** * Compares indexes at different location based on the key field which should contain the unique * value. * * @param indexDir1 indexDir1 * @param indexDir2 indexDir2 * @param keyFieldName keyFieldName * @return result of the compare * @throws IOException problems accessing indexes * @throws ParseException problems parsing query */ public Diff<Document, Diff<Fieldable, DocumentDiff>> compare( String indexDir1, String indexDir2, String keyFieldName) throws IOException, ParseException { FSDirectory dir1 = FSDirectory.open(new File(indexDir1)); IndexReader reader1 = IndexReader.open(dir1); FSDirectory dir2 = FSDirectory.open(new File(indexDir2)); IndexReader reader2 = IndexReader.open(dir2); return compare(reader1, reader2, keyFieldName); }
private RAMDirectory(FSDirectory dir, boolean closeDir, IOContext context) throws IOException { this(); for (String file : dir.listAll()) { if (!Files.isDirectory(dir.getDirectory().resolve(file))) { copyFrom(dir, file, file, context); } } if (closeDir) { dir.close(); } }
/** * Creates an FSDirectory in provided directory and initializes an index if not already existing. * * @param indexDir the directory where to write a new index * @param properties the configuration properties * @return the created {@code FSDirectory} instance * @throws IOException if an error */ public static FSDirectory createFSIndex(File indexDir, Properties properties) throws IOException { LockFactory lockFactory = createLockFactory(indexDir, properties); FSDirectoryType fsDirectoryType = FSDirectoryType.getType(properties); FSDirectory fsDirectory = fsDirectoryType.getDirectory(indexDir, null); // must use the setter (instead of using the constructor) to set the lockFactory, or Lucene will // throw an exception if it's different than a previous setting. fsDirectory.setLockFactory(lockFactory); log.debug("Initialize index: '{}'", indexDir.getAbsolutePath()); initializeIndexIfNeeded(fsDirectory); return fsDirectory; }
/** * Provides basic search functions ... * * @param img * @param indexPath * @return * @throws IOException */ public TopDocs search(BufferedImage img, String indexPath) throws IOException { ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); ImageSearchHits hits = searcher.search(img, DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")))); StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } return scoreDocs(sb.toString(), DirectoryReader.open(FSDirectory.open(new File(indexPath)))); }
/** * We assume that the initial indexing has been done and a set of reference objects has been found * and indexed in the separate directory. However further documents were added and they now need * to get a ranked list of reference objects. So we (i) get all these new documents missing the * field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
public void stop() { @SuppressWarnings("unused") int readCurrentState = current; // Another unneeded value, to ensure visibility of state protected by memory // barrier timer.cancel(); task.stop(); try { directory.close(); } catch (Exception e) { log.unableToCloseLuceneDirectory(directory.getDirectory(), e); } }
private void mergeIndexByPart( Path minorPath, Path inputPath, Path outputPath, int partId, int localIndexVer, int maxVersion, int partNo) throws CorruptIndexException, IOException { List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>(); if (minorPath != null && PathUtil.exists(minorPath)) { if (PathUtil.exists(minorPath.cat(partId + ""))) { mergeIndexArray.add( IndexReader.open( FSDirectory.open( minorPath .cat(MailConstants.PART_PRE + partId) .cat(IndexBuilder.LUCENE_INDEX_DIR) .asFile()))); } } for (int i = localIndexVer + 1; i <= maxVersion; i++) { Path segPath = inputPath.cat(i + ""); Path[] userPathes = segPath.listPathes(); for (Path userPath : userPathes) { if (!userPath.getName().equals("built")) { int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo); if (PathUtil.exists(segPath) && shouldInPart == partId) { mergeIndexArray.add( IndexReader.open( FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile()))); } } } } IndexWriter indexWriter = new IndexWriter( FSDirectory.open( outputPath .cat(MailConstants.PART_PRE + partId) .cat(IndexBuilder.LUCENE_INDEX_DIR) .asFile()), new IKAnalyzer(true), true, IndexWriter.MaxFieldLength.LIMITED); indexWriter.setMaxMergeDocs(1024); indexWriter.setMergeFactor(100); indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0])); indexWriter.close(); }
@Override public void deleteItem(String id) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term(FIELD_LABEL_ID, id)); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
public void testRerankFilters() throws IOException { int queryDocID = (int) (Math.random() * 10000); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 4; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_0_old", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = rerank.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_1_new", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = lsa.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_2_lsa", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
@Override public synchronized void init(XWikiContext context) { LOGGER.debug("Lucene plugin: in init"); this.indexDirs = context.getWiki().Param(PROP_INDEX_DIR); if (StringUtils.isEmpty(this.indexDirs)) { File workDir = getLuceneWorkDirectory(); this.indexDirs = workDir.getAbsolutePath(); } String indexDir = StringUtils.split(this.indexDirs, ",")[0]; File f = new File(indexDir); Directory directory; try { if (!f.exists()) { f.mkdirs(); } directory = FSDirectory.open(f); } catch (IOException e) { LOGGER.error("Failed to open the index directory: ", e); throw new RuntimeException(e); } init(directory, context); }
private void buildIndex( final List<GeoEntry> geoEntryList, final boolean create, final ProgressCallback progressCallback) { Directory directory; try { directory = FSDirectory.open(Paths.get(indexLocation)); } catch (IOException e) { throw new GeoEntryIndexingException( "Couldn't open the directory for the index, " + indexLocation, e); } // Try-with-resources to ensure the IndexWriter always gets closed. try (final IndexWriter indexWriter = createIndexWriter(create, directory)) { try { indexGeoEntries(indexWriter, geoEntryList, progressCallback); } catch (IOException e) { // Need to roll back here before the IndexWriter is closed at the end of the try // block. indexWriter.rollback(); throw e; } } catch (IOException e) { throw new GeoEntryIndexingException("Error writing to the index.", e); } }
/** Open an IndexWriter, executing error handling as needed. */ private IndexWriter openIndexWriter(File searchIndexPath, boolean create) throws IOException { // NFS doesn't work with Lucene default locking as of Lucene 3.3, so use // SimpleFSLockFactory instead. LockFactory lockFactory = new SimpleFSLockFactory(); FSDirectory fsDirectory = FSDirectory.open(searchIndexPath, lockFactory); IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create)); } catch (LockObtainFailedException e) { logger.warn( "Unable to obtain lock for " + searchIndexPath.getAbsolutePath() + ". Attempting to forcibly unlock the index."); if (IndexWriter.isLocked(fsDirectory)) { try { IndexWriter.unlock(fsDirectory); logger.info( "Successfully unlocked search directory " + searchIndexPath.getAbsolutePath()); } catch (IOException ex) { logger.warn( "Unable to unlock search directory " + searchIndexPath.getAbsolutePath() + " " + ex.toString()); } } } if (indexWriter == null) { // try again, there could have been a stale lock indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create)); } return indexWriter; }
/* * index all child directories(only first level directories) in parent directory * and indexed data is stored in the same name source directory */ private long indexDirectories(String parent, String[] dirs, String index, SetupParameters Pa) throws FileHandlerException, IOException { long sumDocs = 0; // index each directory in parent directory for (int i = 0; i < dirs.length; i++) { System.out.println("\t-----FOLDER----- :" + dirs[i].toUpperCase()); String dir_index = index + "/" + dirs[i]; if ((index.endsWith("\\")) || (index.endsWith("/"))) { dir_index = index + dirs[i]; } Directory di = FSDirectory.getDirectory(new File(dir_index), true); Pa.setDir(di); Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true)); // //get name of directory contains website to index // int begin=dirs[i].lastIndexOf("\\"); // if(begin==-1) begin=dirs[i].lastIndexOf("/"); // int end=dirs[i].length()-1; // String dir_site=dirs[i].substring(begin, end); this.index(dirs[i].toLowerCase(), Pa.getWriter(), new File(parent + "\\" + dirs[i])); Pa.getWriter().optimize(); Pa.getWriter().close(); IndexReader reader = Pa.getReader().open(Pa.getDir()); sumDocs += reader.numDocs(); reader.close(); } return sumDocs; }
static IndexWriter createWriter(String filename) throws IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); indexWriterConfig.setRAMBufferSizeMB(100); indexWriterConfig.setOpenMode(OpenMode.CREATE); return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig); }
/** This function is only for test search. */ public static List<String> searchQuery( String indexDir, String queryString, int numResults, CharArraySet stopwords) { String field = "contents"; List<String> hitPaths = new ArrayList<String>(); try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords); QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer); Query query; query = parser.parse(QueryParser.escape(queryString)); TopDocs results = searcher.search(query, null, numResults); for (ScoreDoc hit : results.scoreDocs) { String path = searcher.doc(hit.doc).get("path"); hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt") } } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } catch (ParseException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } return hitPaths; }
/** * give the id list of sentences, from Lucene index * * @param input input word * @param catalogName catalog (domain) name which we'd like to search in * @param limit how many hits are needed (0 means all) */ public List<String> query(String input, String catalogName, int limit) { List<String> res = new ArrayList<String>(); try { catalog c = catalogs.get(catalogName); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath))); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(QueryParser.escape(input)); int n = limit > 0 ? limit : searcher.count(query); if (n == 0) n = 1; TopDocs results = searcher.search(query, n); int endPos = limit; if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits else endPos = results.totalHits; // all hits for (int i = 0; i < endPos; i++) { int id = results.scoreDocs[i].doc; Document doc = searcher.doc(id); res.add(doc.get("filename")); } reader.close(); return res; } catch (ParseException e) { log(e.getMessage()); } catch (IOException e) { log(e.getMessage()); } return res; }
public void writeIndex(IndexingValue indexingValue) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); addDoc(writer, indexingValue); } finally { if (writer != null) { writer.close(); } } }
public LuceneSearch() { String dvnIndexLocation = System.getProperty("dvn.index.location"); // System.out.println("INDEX LOCATION " + dvnIndexLocation); File locationDirectory = null; if (dvnIndexLocation != null) { locationDirectory = new File(dvnIndexLocation); if (locationDirectory.exists() && locationDirectory.isDirectory()) { indexDir = dvnIndexLocation + "/index-dir"; // System.out.println("INDEX " + indexDir); } } // System.out.println("INDEX DEFAULT " + indexDir); String dvnMaxClauseCountStr = System.getProperty("dvn.search.maxclausecount"); if (dvnMaxClauseCountStr != null) { try { dvnMaxClauseCount = Integer.parseInt(dvnMaxClauseCountStr); } catch (Exception e) { e.printStackTrace(); dvnMaxClauseCount = 1024; } } try { dir = FSDirectory.getDirectory(indexDir, false); r = IndexReader.open(dir); searcher = new IndexSearcher(r); } catch (IOException ex) { ex.printStackTrace(); } }
@SuppressWarnings("PMD.CollapsibleIfStatements") private void initialize() throws IOException { synchronized (this) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File indexDir = new File(env.getDataRootFile(), INDEX_DIR); if (project != null) { indexDir = new File(indexDir, project.getPath()); } if (!indexDir.exists() && !indexDir.mkdirs()) { // to avoid race conditions, just recheck.. if (!indexDir.exists()) { throw new FileNotFoundException( "Failed to create root directory [" + indexDir.getAbsolutePath() + "]"); } } if (!env.isUsingLuceneLocking()) { lockfact = NoLockFactory.INSTANCE; } indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact); ignoredNames = env.getIgnoredNames(); includedNames = env.getIncludedNames(); analyzerGuru = new AnalyzerGuru(); if (env.isGenerateHtml()) { xrefDir = new File(env.getDataRootFile(), "xref"); } listeners = new ArrayList<>(); dirtyFile = new File(indexDir, "dirty"); dirty = dirtyFile.exists(); directories = new ArrayList<>(); } }
@Test public void search() throws IOException { String[] q = {"title", "content"}; String filePath = "e:/elewordIndex/LuceneArticle"; Directory dir = FSDirectory.open(new File(filePath)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher search = new IndexSearcher(reader); // Query query =SearchHelper.makeQuery("content", "网络", 0.3f); Query query = SearchHelper.makeMultiQueryFiled(q, "顶顶", 0.8f); TopDocs topDocs = search.search(query, 20); ScoreDoc[] scoreDocs = topDocs.scoreDocs; System.out.println("共:" + topDocs.totalHits + "条结果"); for (ScoreDoc doc : scoreDocs) { int docId = doc.doc; Document document = search.doc(docId); String id = document.get("id"); String title = document.get("title"); System.out.println("------------------------------------------------------------------"); System.out.println("id=" + id + " title=" + title); } }
@Before public void init() throws IOException { indexSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File("C:\\lucenedata\\indexdata")))); }
public void computeErrorRate(ImageSearcher searcher, String prefix) throws IOException, InstantiationException, IllegalAccessException { // int maxHits = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(testIndex))); for (Iterator<String> testIterator = testcases.keySet().iterator(); testIterator.hasNext(); ) { queryImage = testIterator.next(); Document query; if (cutImages) { BufferedImage bimg = ImageUtils.cropImage(ImageIO.read(new FileInputStream(queryImage)), 0, 0, 200, 69); query = builder.createDocument(new FileInputStream(queryImage), queryImage); } else query = builder.createDocument(new FileInputStream(queryImage), queryImage); ImageSearchHits hits = searcher.search(query, reader); // hits = rerank(hits, query, ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT); for (int i = 0; i < hits.length(); i++) { if (hits.doc(i) .get("descriptorImageIdentifier") .toLowerCase() .endsWith(testcases.get(queryImage))) { System.out.println( queryImage.substring(queryImage.lastIndexOf('\\') + 1) + "-" + prefix + " -> Found at rank " + i + " (" + hits.length() + ")"); } } // saveToHtml(queryImage.substring(queryImage.lastIndexOf('\\') + 1) + "-" + prefix, hits, // queryImage); } }
/** * Get the type of the target term representation, query with the suitable input formatted file * and the corresponding index * * @param type * @return HashMap<String,ArrayList<ScoreDoc>> A set of target terms with their extracted * documents * @throws IOException * @throws ParseException */ @Override public HashMap<String, ArrayList<ScoreDoc>> extractDocsByRepresentation() throws IOException, ParseException { String indexName = null, inputFileName = null; indexName = "modernJewishOnly"; m_qg.setType(InputType.Query); inputFileName = "hozOrigQueryAll.txt"; // read the suitable input file LinkedList<Pair<String, String>> queries = new LinkedList<Pair<String, String>>(); BufferedReader reader = new BufferedReader(new FileReader(m_inputDir + inputFileName)); String line = reader.readLine(); while (line != null) { int index = line.indexOf("\t"); queries.add(new Pair<String, String>(line.substring(0, index), line.substring(index + 1))); line = reader.readLine(); } reader.close(); // search for the queries in the index IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(m_indexDir + indexName)))); HashMap<String, ArrayList<ScoreDoc>> termDocs = new HashMap<String, ArrayList<ScoreDoc>>(); for (Pair<String, String> term : queries) { Query q = m_qg.generate(term.value()); termDocs.put( TargetTerm2Id.getStrDesc(Integer.parseInt(term.key())), new ArrayList<ScoreDoc>(Arrays.asList(searcher.search(q, 1000).scoreDocs))); } return termDocs; }
@Override public void deleteOnCreator(Integer creator) throws Exception { boolean create = true; File indexDir = new File(getIndexPath()); if (!indexDir.exists()) { indexDir.mkdirs(); } else { if (indexDir.list().length > 0) { create = false; } } Directory dir = FSDirectory.open(indexDir); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = null; try { writer = new IndexWriter(dir, iwc); writer.deleteDocuments( new Term( FIELD_LABEL_CREATE_USER, StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT))); writer.commit(); } finally { if (writer != null) { writer.close(); } } }
public static void main(String[] args) throws IOException { if (args.length != 2) { System.out.println( "Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell"); System.exit(1); } String spellCheckDir = args[0]; String wordToRespell = args[1]; Directory dir = FSDirectory.open(new File(spellCheckDir)); if (!IndexReader.indexExists(dir)) { System.out.println( "\nERROR: No spellchecker index at path \"" + spellCheckDir + "\"; please run CreateSpellCheckerIndex first\n"); System.exit(1); } SpellChecker spell = new SpellChecker(dir); // #A spell.setStringDistance(new LevensteinDistance()); // #B // spell.setStringDistance(new JaroWinklerDistance()); String[] suggestions = spell.suggestSimilar(wordToRespell, 5); // #C System.out.println(suggestions.length + " suggestions for '" + wordToRespell + "':"); for (String suggestion : suggestions) System.out.println(" " + suggestion); }
/** Generate a spelling suggestion for the definitions stored in defs */ public void createSpellingSuggestions() { IndexReader indexReader = null; SpellChecker checker; try { log.info("Generating spelling suggestion index ... "); indexReader = DirectoryReader.open(indexDirectory); checker = new SpellChecker(spellDirectory); // TODO below seems only to index "defs" , possible bug ? Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false); log.info("done"); } catch (IOException e) { log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing reader", e); } } if (spellDirectory != null) { spellDirectory.close(); } } }
public void startSearch(String searchString) throws IOException { /*analyze(searchString);*/ try { Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс IndexSearcher is = new IndexSearcher(directory); // объект поиска QueryParser parser = new QueryParser( Version.LUCENE_31, "name", new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор /* String str1 = "фотоаппарат"; String str2 = "телевизор"; String str3 = "SONY"; String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3; System.out.println(total);*/ Query query = parser.parse(searchString); // что ищем TopDocs results = is.search( query, null, 10); // включаем поиск ограничиваемся 10 документами, results содержит ... System.out.println( "getMaxScore()=" + results.getMaxScore() + " totalHits=" + results .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество // найденных документов /*proposalController.getProposalList().clear();*/ for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc for (Proposal proposal : proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) { proposalController.getProposalList().add(proposal); _log.info( "Предложение найдено:" + proposal.getRecid().toString() + ",Товар: " + doc.get("recid") + ", " + doc.get("name")); } /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/ } directory.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } addMessage("Поиск выполнен"); }
public static void main(String[] args) throws IOException, ParseException { String indexDir = "C:/lucenedir"; Directory directory = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int day = (int) (new Date().getTime() / Constans.DAY_MILLIS); QueryParser parser = new QueryParser("contents", new StandardAnalyzer()); Query query = parser.parse("java in action"); Query customScoreQuery = new RecencyBoostCustomScoreQuery(query, 2.0, day, 6 * 365, "pubmonthAsDay"); Sort sort = new Sort( new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.Type.STRING) }); TopDocs hits = searcher.search(customScoreQuery, null, Integer.MAX_VALUE, sort, true, false); for (int i = 0; i < hits.scoreDocs.length; i++) { // 两种方式取Document都行,其实searcher.doc内部本质还是调用reader.document // Document doc = reader.document(hits.scoreDocs[i].doc); Document doc = searcher.doc(hits.scoreDocs[i].doc); System.out.println( (1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score=" + hits.scoreDocs[i].score); } reader.close(); directory.close(); }