/** * give the id list of sentences, from Lucene index * * @param input input word * @param catalogName catalog (domain) name which we'd like to search in * @param limit how many hits are needed (0 means all) */ public List<String> query(String input, String catalogName, int limit) { List<String> res = new ArrayList<String>(); try { catalog c = catalogs.get(catalogName); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath))); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(QueryParser.escape(input)); int n = limit > 0 ? limit : searcher.count(query); if (n == 0) n = 1; TopDocs results = searcher.search(query, n); int endPos = limit; if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits else endPos = results.totalHits; // all hits for (int i = 0; i < endPos; i++) { int id = results.scoreDocs[i].doc; Document doc = searcher.doc(id); res.add(doc.get("filename")); } reader.close(); return res; } catch (ParseException e) { log(e.getMessage()); } catch (IOException e) { log(e.getMessage()); } return res; }
/** This function is only for test search. */ public static List<String> searchQuery( String indexDir, String queryString, int numResults, CharArraySet stopwords) { String field = "contents"; List<String> hitPaths = new ArrayList<String>(); try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords); QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer); Query query; query = parser.parse(QueryParser.escape(queryString)); TopDocs results = searcher.search(query, null, numResults); for (ScoreDoc hit : results.scoreDocs) { String path = searcher.doc(hit.doc).get("path"); hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt") } } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } catch (ParseException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } return hitPaths; }
@Override public List<Document> customRanking(List<Query> queryList) throws Exception { List<Document> result = new ArrayList<Document>(); Map<String, ResultRep> docMap = new LinkedHashMap<String, ResultRep>(); BooleanQuery bq = new BooleanQuery(); bq.setMinimumNumberShouldMatch(1); Set<String> tgtset = tstSet == null ? new HashSet<String>() : tstSet; for (String clsStr : tgtset) bq.add( new TermQuery(new Term(Configuration.IDX_FIELD_PKG_NAME, QueryParser.escape(clsStr))), Occur.SHOULD); Filter idFilter = new QueryWrapperFilter(bq); try { Float avg = new Float(queryList.size()); Float rank; // BooleanQuery mainQuery; for (Query query : queryList) { // mainQuery = new BooleanQuery(); // mainQuery.add(query, Occur.SHOULD); // mainQuery.add(idFilter, Occur.MUST); result = searcher(query, idFilter); for (int i = 0; i < result.size(); i++) { Document doc = result.get(i); String key = doc.get(Configuration.IDX_FIELD_METHOD_DESCRIPTION_CATCHALL); rank = Float.parseFloat(doc.get("SCORE")); rank = rank / avg; if (!docMap.containsKey(key)) docMap.put(key, new ResultRep(doc, rank)); else { ResultRep rep = docMap.get(key); rep.setRank(rep.getRank() + rank); } } } } catch (Exception e) { e.printStackTrace(); } TreeSet<ResultRep> set = new TreeSet<ResultRep>(new CustomComparator()); for (String key : docMap.keySet()) set.add(docMap.get(key)); result = new ArrayList<Document>(); Iterator<ResultRep> iter = set.descendingIterator(); ResultRep rep; while (iter.hasNext()) { rep = iter.next(); if (tgtset.contains(rep.getDoc().get(Configuration.IDX_FIELD_CLASS_NAME).trim())) result.add(rep.getDoc()); } if (result.size() > 10) result = result.subList(0, 10); return result; }
private FullTextQuery buildFullTextQuery( UserSearchRequest request, Pageable pageable, Criteria criteria) { FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager); QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(User.class).get(); @SuppressWarnings("rawtypes") BooleanJunction<BooleanJunction> junction = qb.bool(); junction.must(qb.all().createQuery()); if (StringUtils.hasText(request.getKeyword())) { Analyzer analyzer = fullTextEntityManager.getSearchFactory().getAnalyzer("synonyms"); String[] fields = new String[] { "loginId", "name.firstName", "name.lastName", }; MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); parser.setDefaultOperator(QueryParser.Operator.AND); Query query = null; try { query = parser.parse(request.getKeyword()); } catch (ParseException e1) { try { query = parser.parse(QueryParser.escape(request.getKeyword())); } catch (ParseException e2) { throw new RuntimeException(e2); } } junction.must(query); } if (!CollectionUtils.isEmpty(request.getRoles())) { for (User.Role role : request.getRoles()) { junction.must(qb.keyword().onField("roles").matching(role).createQuery()); } } Query searchQuery = junction.createQuery(); Sort sort = new Sort(new SortField("id", SortField.Type.STRING, false)); FullTextQuery persistenceQuery = fullTextEntityManager .createFullTextQuery(searchQuery, User.class) .setCriteriaQuery(criteria) // .setProjection("id") .setSort(sort); if (pageable != null) { persistenceQuery.setFirstResult(pageable.getOffset()); persistenceQuery.setMaxResults(pageable.getPageSize()); } return persistenceQuery; }
/** This function is only for test search. */ public static List<String> searchQuery( String indexDir, String queryString, int numResults, CharArraySet stopwords) { String field = "contents"; List<String> hitPaths = new ArrayList<String>(); try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); // Used with mySimilarity.java for part E mySimilarity sim = new mySimilarity(); searcher.setSimilarity(sim); // Used in part F // String field="Search_Field"; // the second parameter calls the getAvgLength method and automatically calculates the // Average Length value // BM25Parameters.setAverageLength(field,getAvgLen(reader,field)); // BM25Parameters.setB(0.75f); // BM25Parameters.setK1(2f); // BM25BooleanQuery query = new BM25BooleanQuery( "Cystic hydroma" ,field,analyzer); // System.out.println ("Searching for: " + query.toString(field)); // TopDocs top=searcher.search(query, 10); // ScoreDoc[] docs = top.scoreDocs; // for (int i = 0; i < 10; i++){ // System.out.println("the document with id= " + docs[i].doc + " has score ="+docs[i].score); // } Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords); QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer); Query query; query = parser.parse(QueryParser.escape(queryString)); TopDocs results = searcher.search(query, null, numResults); for (ScoreDoc hit : results.scoreDocs) { String path = searcher.doc(hit.doc).get("path"); hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt") } } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } catch (ParseException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } return hitPaths; }
@Override public Page<Comment> search(CommentSearchRequest request, Pageable pageable) { FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager); QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Comment.class).get(); @SuppressWarnings("rawtypes") BooleanJunction<BooleanJunction> junction = qb.bool(); junction.must(qb.all().createQuery()); if (StringUtils.hasText(request.getKeyword())) { Analyzer analyzer = fullTextEntityManager.getSearchFactory().getAnalyzer("synonyms"); String[] fields = new String[] {"authorName", "content"}; MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); parser.setDefaultOperator(QueryParser.Operator.AND); Query query = null; try { query = parser.parse(request.getKeyword()); } catch (ParseException e1) { try { query = parser.parse(QueryParser.escape(request.getKeyword())); } catch (ParseException e2) { throw new RuntimeException(e2); } } junction.must(query); } if (StringUtils.hasText(request.getLanguage())) { junction.must( qb.keyword().onField("post.language").matching(request.getLanguage()).createQuery()); } if (request.getPostId() != null) { junction.must(qb.keyword().onField("post.id").matching(request.getPostId()).createQuery()); } if (request.getApproved() != null) { junction.must(qb.keyword().onField("approved").matching(request.getApproved()).createQuery()); } Query searchQuery = junction.createQuery(); Session session = (Session) entityManager.getDelegate(); Criteria criteria = session .createCriteria(Comment.class) .setFetchMode("post", FetchMode.JOIN) .setFetchMode("author", FetchMode.JOIN); Sort sort = null; if (pageable.getSort() != null) { if (pageable.getSort().getOrderFor("date") != null) { Order order = pageable.getSort().getOrderFor("date"); sort = new Sort( new SortField( "date", SortField.Type.STRING, order.getDirection().equals(Direction.DESC)), new SortField( "id", SortField.Type.LONG, order.getDirection().equals(Direction.DESC))); } } if (sort == null) { sort = new Sort( new SortField("date", SortField.Type.STRING), new SortField("id", SortField.Type.LONG)); } FullTextQuery persistenceQuery = fullTextEntityManager .createFullTextQuery(searchQuery, Comment.class) .setCriteriaQuery(criteria) .setSort(sort); persistenceQuery.setFirstResult(pageable.getOffset()); persistenceQuery.setMaxResults(pageable.getPageSize()); int resultSize = persistenceQuery.getResultSize(); @SuppressWarnings("unchecked") List<Comment> results = persistenceQuery.getResultList(); return new PageImpl<>(results, pageable, resultSize); }
private FullTextQuery buildFullTextQuery( ArticleSearchRequest request, Pageable pageable, Criteria criteria) { FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager); QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Article.class).get(); @SuppressWarnings("rawtypes") BooleanJunction<BooleanJunction> junction = qb.bool(); junction.must(qb.all().createQuery()); junction.must( qb.keyword().onField("drafted").ignoreAnalyzer().matching("_null_").createQuery()); if (StringUtils.hasText(request.getKeyword())) { Analyzer analyzer = fullTextEntityManager.getSearchFactory().getAnalyzer("synonyms"); String[] fields = new String[] { "title", "body", "categories.name", "tags.name", }; MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); parser.setDefaultOperator(QueryParser.Operator.AND); Query query = null; try { query = parser.parse(request.getKeyword()); } catch (ParseException e1) { try { query = parser.parse(QueryParser.escape(request.getKeyword())); } catch (ParseException e2) { throw new RuntimeException(e2); } } junction.must(query); } if (request.getStatus() != null) { junction.must(qb.keyword().onField("status").matching(request.getStatus()).createQuery()); } if (StringUtils.hasText(request.getLanguage())) { junction.must(qb.keyword().onField("language").matching(request.getLanguage()).createQuery()); } if (request.getDateFrom() != null) { junction.must(qb.range().onField("date").above(request.getDateFrom()).createQuery()); } if (request.getDateTo() != null) { junction.must(qb.range().onField("date").below(request.getDateTo()).createQuery()); } if (!CollectionUtils.isEmpty(request.getCategoryIds())) { BooleanJunction<BooleanJunction> subJunction = qb.bool(); for (long categoryId : request.getCategoryIds()) { subJunction.should( qb.keyword().onField("categories.id").matching(categoryId).createQuery()); } junction.must(subJunction.createQuery()); } if (!CollectionUtils.isEmpty(request.getCategoryCodes())) { BooleanJunction<BooleanJunction> subJunction = qb.bool(); for (String categoryCode : request.getCategoryCodes()) { subJunction.should( qb.keyword().onField("categories.code").matching(categoryCode).createQuery()); } junction.must(subJunction.createQuery()); } if (!CollectionUtils.isEmpty(request.getTagIds())) { BooleanJunction<BooleanJunction> subJunction = qb.bool(); for (long tagId : request.getTagIds()) { subJunction.should(qb.keyword().onField("tags.id").matching(tagId).createQuery()); } junction.must(subJunction.createQuery()); } if (!CollectionUtils.isEmpty(request.getTagNames())) { BooleanJunction<BooleanJunction> subJunction = qb.bool(); for (String tagName : request.getTagNames()) { subJunction.should(qb.phrase().onField("tags.name").sentence(tagName).createQuery()); } junction.must(subJunction.createQuery()); } if (request.getAuthorId() != null) { junction.must( qb.keyword().onField("author.id").matching(request.getAuthorId()).createQuery()); } Query searchQuery = junction.createQuery(); Sort sort = new Sort( new SortField("date", SortField.Type.STRING, true), new SortField("id", SortField.Type.LONG, true)); FullTextQuery persistenceQuery = fullTextEntityManager .createFullTextQuery(searchQuery, Article.class) .setCriteriaQuery(criteria) .setSort(sort); if (pageable != null) { persistenceQuery.setFirstResult(pageable.getOffset()); persistenceQuery.setMaxResults(pageable.getPageSize()); } return persistenceQuery; }