/** * 在所有的字段中搜索,如果是坐标的话,需要加上*,才能匹配到 * * @param queryString 关键字 * @throws Exception */ public static void searchListIKWithExp(String queryString) throws Exception { // 查询的字符串:输入不存在的字符串是查询不到的,如:中国 // 查询字段集合 IndexSearcher searcher = LuceneUtils.createIndexSearcher(); // 这里使用的是IK的 Query query = IKQueryParser.parse(queryString); System.out.println(query); // 在搜索器中进行查询 // 对查询内容进行过滤 Filter filter = null; // 一次在索引器查询多少条数据 int queryCount = 100; TopDocs results = searcher.search(query, filter, queryCount); System.out.println("总符合: " + results.totalHits + "条数!"); // 显示记录 for (ScoreDoc sr : results.scoreDocs) { // 文档编号 int docID = sr.doc; // 真正的内容 Document doc = searcher.doc(docID); System.out.println("name = " + doc.get("name")); System.out.println("address = " + doc.get("address")); System.out.println("eastNew = " + doc.get("eastNew")); System.out.println("northNew = " + doc.get("northNew")); System.out.println("geom = " + doc.get("geom")); System.out.println("phone = " + doc.get("phone")); System.out.println("id = " + doc.get("id")); System.out.println("datatype = " + doc.get("datatype")); System.out.println("dataTypeKey = " + doc.get("dataTypeKey")); } }
public static void main(String[] args) { String ikQueryExp = "(id='ABcdRf' && date:{'20010101','20110101'} && keyword:'^魔兽中国$') || (content:'魔兽 中国' || ulr='www.ik.com') - name:'林良益'"; // String ikQueryExp = "content:'----' || title:'----' - name:'林良益'"; Query result = IKQueryParser.parse(ikQueryExp); // Query result = IKQueryParser.parse("(newsKeyword='---' || newsTitle:'---' || // newsContent:'---') && newsClass='1'"); System.out.println(result); }
public String getContentByTitle(String title) { if (!(new File(indexDir).exists()) || (new File(indexDir).listFiles().length == 0)) { System.out.println("索引文件夹不存在或文件夹为空~!"); return null; } String content = ""; try { Query query = IKQueryParser.parse("title", title); // System.out.println(query.toString()); TopDocCollector topCollector = new TopDocCollector(1000); IndexSearcher searcher = new IndexSearcher(indexDir); searcher.setSimilarity(new IKSimilarity()); searcher.search(query, topCollector); ScoreDoc[] scoreDocs = topCollector.topDocs().scoreDocs; // System.out.println(scoreDocs.length); // System.out.println(query.toString()); Document doc = null; for (ScoreDoc scoreDoc : scoreDocs) { doc = searcher.doc(scoreDoc.doc); content = doc.get("content"); System.out.println(doc.get("title")); if (doc.get("title").equals(title)) break; } } catch (Exception e) { e.printStackTrace(); } return content; }
@Override public void hooker(String token, Searcher searcher, int cateGory, int dfOfToken) throws IOException { // TODO Auto-generated method stub Query query = IKQueryParser.parse(IndexFields.CONTENTS, token); TreeSet<Term> terms = new TreeSet<Term>(); query.extractTerms(terms); double df = searcher.docFreq(terms.first()); double pt = df / totalTextNumber; double max = Double.MIN_VALUE; double avg = 0; for (int i = 0; i < Constants.CATEGORYS.length; ++i) { double dfInc = searchers[i].docFreq(terms.first()); double ptIfC = dfInc / CategoryTextCount[i]; double MI = log2n(ptIfC / pt); if (MI > max) { max = MI; } avg += MI * CategoryProbabilities[i]; } maxFeatureWeight.add(new FeatureWeight(token, max, dfOfToken)); }
/** * @param key 搜索内容所在的字段名称 * @param value 所要搜索的内容 * @throws CorruptIndexException * @throws IOException * @throws ParseException */ public static void search(String key, String value) throws CorruptIndexException, IOException, ParseException { IndexSearcher searcher; // 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器 // Analyzer analyzer = new IKAnalyzer(); //new StandardAnalyzer(Version.LUCENE_36) // QueryParser qp = new QueryParser(Version.LUCENE_36, key,analyzer); searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH)))); searcher.setSimilarity(new IKSimilarity()); Query query = IKQueryParser.parse(key, value); // Query tq = qp.parse(value); TopDocs topDocs = searcher.search(query, 10000); // searcher.search(query, results); // new function ScoreDoc[] scoreDocs = topDocs.scoreDocs; System.out.println("命中:" + topDocs.totalHits); Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); Analyzer analyzer = new IKAnalyzer(); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docSn = scoreDoc.doc; // 文档内部编号 Document doc = searcher.doc(docSn); // 根据文档编号取出文档 hightlightFeild(highlighter, analyzer, doc, "name"); hightlightFeild(highlighter, analyzer, doc, "address"); hightlightFeild(highlighter, analyzer, doc, "datatype"); // scoreDocs[i]. System.out.println("name:" + doc.get("name")); // new function System.out.println("address:" + doc.get("address")); System.out.println("datatype:" + doc.get("datatype")); System.out.println("geom:" + doc.get("geom")); } }
/** 语法解析 */ private void parseSyntax() { for (int i = 0; i < this.elements.size(); i++) { Element e = this.elements.get(i); if ('F' == e.type) { Element e2 = this.elements.get(i + 1); if ('=' != e2.type && ':' != e2.type) { throw new IllegalStateException("表达式异常: = 或 : 号丢失"); } Element e3 = this.elements.get(i + 2); // 处理 = 和 : 运算 if ('\'' == e3.type) { i += 2; if ('=' == e2.type) { TermQuery tQuery = new TermQuery(new Term(e.toString(), e3.toString())); this.querys.push(tQuery); } else if (':' == e2.type) { String keyword = e3.toString(); if (keyword.startsWith("^") && keyword.endsWith("$")) { Query pQuery = this.luceneQueryParse(e.toString(), keyword); this.querys.push(pQuery); } else { Query tQuery = IKQueryParser.parse(e.toString(), e3.toString()); this.querys.push(tQuery); } } } else if ('[' == e3.type || '{' == e3.type) { i += 2; // 处理 [] 和 {} LinkedList<Element> eQueue = new LinkedList<Element>(); eQueue.add(e3); for (i++; i < this.elements.size(); i++) { Element eN = this.elements.get(i); eQueue.add(eN); if (']' == eN.type || '}' == eN.type) { break; } } // 翻译RangeQuery Query rangeQuery = this.toTermRangeQuery(e, eQueue); this.querys.push(rangeQuery); } else { throw new IllegalStateException("表达式异常:匹配值丢失"); } } else if ('(' == e.type) { this.operates.push(e); } else if (')' == e.type) { boolean doPop = true; while (doPop && !this.operates.empty()) { Element op = this.operates.pop(); if ('(' == op.type) { doPop = false; } else { Query q = toQuery(op); this.querys.push(q); } } } else { if (this.operates.isEmpty()) { this.operates.push(e); } else { boolean doPeek = true; while (doPeek && !this.operates.isEmpty()) { Element eleOnTop = this.operates.peek(); if ('(' == eleOnTop.type) { doPeek = false; this.operates.push(e); } else if (compare(e, eleOnTop) == 1) { this.operates.push(e); doPeek = false; } else if (compare(e, eleOnTop) == 0) { Query q = toQuery(eleOnTop); this.operates.pop(); this.querys.push(q); } else { Query q = toQuery(eleOnTop); this.operates.pop(); this.querys.push(q); } } if (doPeek && this.operates.empty()) { this.operates.push(e); } } } } while (!this.operates.isEmpty()) { Element eleOnTop = this.operates.pop(); Query q = toQuery(eleOnTop); this.querys.push(q); } }