/** * 在所有的字段中搜索,如果是坐标的话,需要加上*,才能匹配到,这里采用标准方式获取的集合范围太大了,分词不是很准确 * * @param queryString 关键字 * @throws Exception */ @Deprecated public static void searchList(String queryString) throws Exception { // 查询的字符串:输入不存在的字符串是查询不到的,如:中国 // 查询字段集合 String[] queryFileds = {"name", "city", "address", "eastNew", "northNew", "geom", "phone"}; IndexSearcher searcher = LuceneUtils.createIndexSearcher(); Query query = LuceneUtils.createQuery(queryFileds, queryString); // 在搜索器中进行查询 // 对查询内容进行过滤 Filter filter = null; // 一次在索引器查询多少条数据 int queryCount = 100; TopDocs results = searcher.search(query, filter, queryCount); System.out.println("总符合: " + results.totalHits + "条数!"); // 显示记录 for (ScoreDoc sr : results.scoreDocs) { // 文档编号 int docID = sr.doc; // 真正的内容 Document doc = searcher.doc(docID); System.out.println("name = " + doc.get("name")); System.out.println("address = " + doc.get("address")); System.out.println("eastNew = " + doc.get("eastNew")); System.out.println("northNew = " + doc.get("northNew")); System.out.println("geom = " + doc.get("geom")); System.out.println("phone = " + doc.get("phone")); } }
/** * Perform searching for word by word content. * * @param content the content used to search. * @return a list of found documents. * @throws java.io.IOException if the path to the lucene index is incorrect. */ public static List<Document> performSearchByContent(String content) throws IOException { List<Document> foundDocs = performSearch(QueryUtils.buildPhraseQuery(content), LuceneUtils.getLuceneSearcher()); if (CollectionUtils.isEmpty(foundDocs)) { foundDocs = performSearch(QueryUtils.buildFuzzyQuery(content), LuceneUtils.getLuceneSearcher()); } return foundDocs; }
/* * 复合条件查询,即 and or 等 BooleanClause.Occur.MUST表示and * BooleanClause.Occur.MUST_NOT表示not BooleanClause.Occur.SHOULD表示or. */ public static void searchQuery(String[] queries, String[] fields) throws Exception { IndexSearcher searcher = LuceneUtils.createIndexSearcher(); // String[] queries = { "南城","网吧"}; // String[] fields = { "name","city"}; BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}; Query query = MultiFieldQueryParser.parse( Version.LUCENE_36, queries, fields, clauses, new StandardAnalyzer(Version.LUCENE_36)); TopDocs results = searcher.search(query, null, 100); System.out.println("总符合: " + results.totalHits + "条数!"); // 显示记录 for (ScoreDoc sr : results.scoreDocs) { // 文档编号 int docID = sr.doc; // 真正的内容 Document doc = searcher.doc(docID); System.out.println("name = " + doc.get("name")); System.out.println("address = " + doc.get("address")); System.out.println("city = " + doc.get("city")); System.out.println("lnglat = " + doc.get("lnglat")); } }
/** * 在所有的字段中搜索,如果是坐标的话,需要加上*,才能匹配到 * * @param queryString 关键字 * @throws Exception */ public static void searchListIKWithExp(String queryString) throws Exception { // 查询的字符串:输入不存在的字符串是查询不到的,如:中国 // 查询字段集合 IndexSearcher searcher = LuceneUtils.createIndexSearcher(); // 这里使用的是IK的 Query query = IKQueryParser.parse(queryString); System.out.println(query); // 在搜索器中进行查询 // 对查询内容进行过滤 Filter filter = null; // 一次在索引器查询多少条数据 int queryCount = 100; TopDocs results = searcher.search(query, filter, queryCount); System.out.println("总符合: " + results.totalHits + "条数!"); // 显示记录 for (ScoreDoc sr : results.scoreDocs) { // 文档编号 int docID = sr.doc; // 真正的内容 Document doc = searcher.doc(docID); System.out.println("name = " + doc.get("name")); System.out.println("address = " + doc.get("address")); System.out.println("eastNew = " + doc.get("eastNew")); System.out.println("northNew = " + doc.get("northNew")); System.out.println("geom = " + doc.get("geom")); System.out.println("phone = " + doc.get("phone")); System.out.println("id = " + doc.get("id")); System.out.println("datatype = " + doc.get("datatype")); System.out.println("dataTypeKey = " + doc.get("dataTypeKey")); } }
@SuppressWarnings("deprecation") @Override public TokenStream tokenStream(String fieldName, Reader reader) { Tokenizer tokenizer = new WhitespaceTokenizer(LuceneUtils.getVersion(), reader); TokenStream result = new PatternReplaceFilter( tokenizer, Pattern.compile("^([\\.!\\?,:;\"'\\(\\)]*)(.*?)([\\.!\\?,:;\"'\\(\\)]*)$"), "$2", true); result = new PatternReplaceFilter(result, Pattern.compile("'s"), "s", true); result = new StopFilter(false, result, LuceneUtils.caseSensitiveStopSet); result = new LowerCaseFilter(LuceneUtils.getVersion(), result); result = new ASCIIFoldingFilter(result); return result; }
/** * Perform search for word by id. (word description) * * @param id the id to search. * @return the returned document. * @throws java.io.IOException the IOException. */ public static List<Document> performSearchById(String id) throws IOException { TermQuery query = new TermQuery(new Term(LuceneSearchFields.ID, id.trim().toLowerCase())); return performSearch(query, LuceneUtils.getLuceneSearcher()); }
/** * Perform searching for word by word content. * * @param content the content used to search. * @param luceneDirPath the lucene index path. * @return a list of found documents. * @throws java.io.IOException if the path to the lucene index is incorrect. */ public static List<Document> performSearchByContent(String content, String luceneDirPath) throws IOException { String indexDirPath = luceneDirPath + PACKAGE_PATH; return performSearch( QueryUtils.buildPhraseQuery(content), LuceneUtils.getLuceneSearcher(indexDirPath)); }
/** * Perform searching using default LuceneSearcher. * * @param query the query used to perform the search. * @return a list of found documents. * @throws java.io.IOException if the default path to the index is wrong. */ public static List<Document> performSearch(Query query) throws IOException { return performSearch(query, LuceneUtils.getLuceneSearcher()); }