/** * 将分支数据转成Query逻辑 * * @return */ List<Query> toQueries(String fieldName) { List<Query> queries = new ArrayList<Query>(1); // 生成当前branch 的query if (lexeme != null) { queries.add(new TermQuery(new Term(fieldName, lexeme.getLexemeText()))); } // 生成child branch 的query if (acceptedBranchs != null && acceptedBranchs.size() > 0) { if (acceptedBranchs.size() == 1) { Query onlyOneQuery = optimizeQueries(acceptedBranchs.get(0).toQueries(fieldName)); if (onlyOneQuery != null) { queries.add(onlyOneQuery); } } else { BooleanQuery orQuery = new BooleanQuery(); for (TokenBranch childBranch : acceptedBranchs) { Query childQuery = optimizeQueries(childBranch.toQueries(fieldName)); if (childQuery != null) { orQuery.add(childQuery, Occur.SHOULD); } } if (orQuery.getClauses().length > 0) { queries.add(orQuery); } } } // 生成nextBranch的query if (nextBranch != null) { queries.addAll(nextBranch.toQueries(fieldName)); } return queries; }
/** * 单连续字窜(不带空格符)单Field查询分析 * * @param field * @param query * @return */ public static Query parse(String field, String query) { if (field == null) { throw new IllegalArgumentException("parameter \"field\" is null"); } if (query == null || "".equals(query.trim())) { return new TermQuery(new Term(field)); } // 从缓存中取出已经解析的query生产的TokenBranch TokenBranch root = getCachedTokenBranch(query); if (root != null) { return optimizeQueries(root.toQueries(field)); } else { // System.out.println(System.currentTimeMillis()); root = new TokenBranch(null); // 对查询条件q进行分词 StringReader input = new StringReader(query.trim()); IKSegmentation ikSeg = new IKSegmentation(input, isMaxWordLength); try { for (Lexeme lexeme = ikSeg.next(); lexeme != null; lexeme = ikSeg.next()) { // 处理词元分支 root.accept(lexeme); } } catch (IOException e) { e.printStackTrace(); } // 缓存解析结果的博弈树 cachedTokenBranch(query, root); return optimizeQueries(root.toQueries(field)); } }
/** * 组合词元分支 * * @param _lexeme * @return 返回当前branch能否接收词元对象 */ boolean accept(Lexeme _lexeme) { /* * 检查新的lexeme 对当前的branch 的可接受类型 * acceptType : REFUSED 不能接受 * acceptType : ACCEPTED 接受 * acceptType : TONEXT 由相邻分支接受 */ int acceptType = checkAccept(_lexeme); switch (acceptType) { case REFUSED: // REFUSE 情况 return false; case ACCEPTED: if (acceptedBranchs == null) { // 当前branch没有子branch,则添加到当前branch下 acceptedBranchs = new ArrayList<TokenBranch>(2); acceptedBranchs.add(new TokenBranch(_lexeme)); } else { boolean acceptedByChild = false; // 当前branch拥有子branch,则优先由子branch接纳 for (TokenBranch childBranch : acceptedBranchs) { acceptedByChild = childBranch.accept(_lexeme) || acceptedByChild; } // 如果所有的子branch不能接纳,则由当前branch接纳 if (!acceptedByChild) { acceptedBranchs.add(new TokenBranch(_lexeme)); } } // 设置branch的最大右边界 if (_lexeme.getEndPosition() > this.rightBorder) { this.rightBorder = _lexeme.getEndPosition(); } break; case TONEXT: // 把lexeme放入当前branch的相邻分支 if (this.nextBranch == null) { // 如果还没有相邻分支,则建立一个不交叠的分支 this.nextBranch = new TokenBranch(null); } this.nextBranch.accept(_lexeme); break; } return true; }
public boolean equals(Object o) { if (o == null) { return false; } if (this == o) { return true; } if (o instanceof TokenBranch) { TokenBranch other = (TokenBranch) o; if (this.lexeme == null || other.getLexeme() == null) { return false; } else { return this.lexeme.equals(other.getLexeme()); } } else { return false; } }