@NotNull @ThreadSafe private static QueryWrapper createQuery(@NotNull String queryString) throws SearchException { PhraseDetectingQueryParser queryParser = new PhraseDetectingQueryParser( IndexRegistry.LUCENE_VERSION, Fields.CONTENT.key(), IndexRegistry.getAnalyzer()); queryParser.setAllowLeadingWildcard(true); RewriteMethod rewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; queryParser.setMultiTermRewriteMethod(rewriteMethod); if (!SettingsConf.Bool.UseOrOperator.get()) queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); try { Query query = queryParser.parse(queryString); boolean isPhraseQuery = queryParser.isPhraseQuery(); return new QueryWrapper(query, isPhraseQuery); } catch (IllegalArgumentException e) { /* * This happens for example when you enter a fuzzy search with * similarity >= 1, e.g. "fuzzy~1". */ String msg = Msg.invalid_query.get() + "\n\n" + e.getMessage(); throw new SearchException(msg); } catch (ParseException e) { String msg = Msg.invalid_query.get() + "\n\n" + e.getMessage(); throw new SearchException(msg); } }
public void queryIndex() { Query q; try { q = new MultiFieldQueryParser(new String[] {"title", "name"}, analyzer).parse("s*"); // searching ... int hitsPerPage = 10; IndexSearcher searcher = new IndexSearcher(indexDirectory); TopDocCollector collector = new TopDocCollector(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // output results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("name") + ": " + d.get("title")); } } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public TopDocCollectorSearchResult searchByScore( String queryStr, int startFrom, String operator) { try { queryStr = queryStr.trim(); QueryParser parser = new QueryParser("contents", analyzer); if (QueryParser.AND_OPERATOR.toString().equalsIgnoreCase(operator)) { parser.setDefaultOperator(QueryParser.AND_OPERATOR); } else { parser.setDefaultOperator(QueryParser.OR_OPERATOR); } Query query; query = parser.parse(queryStr); TopDocCollector collector = doPagingSearch(query, startFrom); TopDocCollectorSearchResult result = new TopDocCollectorSearchResult(collector, searcher); return result; } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; // System.out.println("Searching for: " + query.toString("contents")); // doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == // null); // } // reader.close(); }
public void addTerm(String field, String value) throws ParseException { try { LuceneHelperUtil.addTerm(_booleanQuery, field, value); } catch (org.apache.lucene.queryParser.ParseException pe) { throw new ParseException(pe.getMessage()); } }
public TopFieldDocsSearchResult searchBySession(String queryStr, int startFrom, String operator) { try { queryStr = queryStr.trim(); QueryParser parser = new QueryParser("contents", analyzer); Operator op = QueryParser.AND_OPERATOR; if (QueryParser.AND_OPERATOR.toString().equalsIgnoreCase(operator)) { parser.setDefaultOperator(QueryParser.AND_OPERATOR); } else { parser.setDefaultOperator(QueryParser.OR_OPERATOR); } Query query; query = parser.parse(queryStr); Sort sort = new Sort("summary", true); TopFieldDocs tfd = searcher.search(query, null, startFrom + 10, sort); TopFieldDocsSearchResult result = new TopFieldDocsSearchResult(tfd, searcher); return result; } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
@SuppressWarnings("unchecked") @GET @Produces(MediaType.APPLICATION_JSON) public Response volumes( @QueryParam("categoryId") Long categoryId, @QueryParam("page") Integer page, @QueryParam("q") String q) { Volumes volumes = new Volumes(); VolumeDAO volumeDAO = DAOFactory.getInstance().getVolumeDAO(); FullTextSession fts = org.hibernate.search.Search.getFullTextSession(volumeDAO.getSession()); try { Query query = buildLuceneQuery(categoryId, page, q); FullTextQuery createFullTextQuery = fts.createFullTextQuery(query, com.book.identification.model.Volume.class); addPaginationToQuery(createFullTextQuery, page); List<Volume> list = createFullTextQuery.list(); for (Volume volume : list) { volume.setPage(page); volume.setQ(q); } volumes.setVolumes(createFullTextQuery.list()); } catch (ParseException e1) { e1.printStackTrace(); } if (volumes.getVolumes().isEmpty()) { return Response.ok("volumes : {}").build(); } return Response.ok(volumes).build(); }
public void startSearch(String searchString) throws IOException { /*analyze(searchString);*/ try { Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс IndexSearcher is = new IndexSearcher(directory); // объект поиска QueryParser parser = new QueryParser( Version.LUCENE_31, "name", new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор /* String str1 = "фотоаппарат"; String str2 = "телевизор"; String str3 = "SONY"; String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3; System.out.println(total);*/ Query query = parser.parse(searchString); // что ищем TopDocs results = is.search( query, null, 10); // включаем поиск ограничиваемся 10 документами, results содержит ... System.out.println( "getMaxScore()=" + results.getMaxScore() + " totalHits=" + results .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество // найденных документов /*proposalController.getProposalList().clear();*/ for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc for (Proposal proposal : proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) { proposalController.getProposalList().add(proposal); _log.info( "Предложение найдено:" + proposal.getRecid().toString() + ",Товар: " + doc.get("recid") + ", " + doc.get("name")); } /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/ } directory.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } addMessage("Поиск выполнен"); }
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override public List<AbstractPermissionsOwner> search( String queryString, boolean withUsers, boolean withGroups) { List<AbstractPermissionsOwner> results = new ArrayList<AbstractPermissionsOwner>(); // No query should be realized while re-indexing resources. if (!inhibitSearch) { // Gets the Hibernate search object to performs queries. FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager); // Parse the the queryString. MultiFieldQueryParser parser = new MultiFieldQueryParser( Version.LUCENE_30, new String[] {"name", "firstName", "lastName", "email", "login"}, new StandardAnalyzer(Version.LUCENE_31)); parser.setDefaultOperator(Operator.OR); try { Query luceneQuery = parser.parse(queryString); FullTextQuery query = null; // Because of the poor design of the Hibernate Search API and the usage of varagrs, we must // have this // if-else algorihm. TODO refactor with reflection. if (withUsers && withGroups) { query = fullTextEntityManager.createFullTextQuery(luceneQuery, User.class, Group.class); } else if (withUsers) { query = fullTextEntityManager.createFullTextQuery(luceneQuery, User.class); } else if (withGroups) { query = fullTextEntityManager.createFullTextQuery(luceneQuery, Group.class); } // Finally execute the query. if (query != null) { List<AbstractPermissionsOwner> found = query.getResultList(); // Keeps only distinct results. for (AbstractPermissionsOwner foundObject : found) { if (!results.contains(foundObject)) { // TODO Remove this Hibernate specific block. // Sometimes hibernate Search returns Javassist proxies, which can't be properly // deserialize by Jackson. if (foundObject instanceof HibernateProxy) { HibernateProxy h = (HibernateProxy) foundObject; foundObject = (AbstractPermissionsOwner) h.getHibernateLazyInitializer().getImplementation(); } results.add(foundObject); } } } } catch (ParseException exc) { // Handle parsing failure String error = "Misformatted queryString '" + queryString + "': " + exc.getMessage(); logger.debug("[search] " + error); throw new IllegalArgumentException(error, exc); } } return results; } // search().
public void add(Query query, BooleanClauseOccur booleanClauseOccur) throws ParseException { try { _booleanQuery.add( QueryTranslator.translate(query), BooleanClauseOccurTranslator.translate(booleanClauseOccur)); } catch (org.apache.lucene.queryParser.ParseException pe) { throw new ParseException(pe.getMessage()); } }
public List<AgeObject> select(String query) { final List<AgeObject> res = new ArrayList<AgeObject>(); Query q; try { q = new QueryParser(Version.LUCENE_30, defaultFieldName, analyzer).parse(query); final IndexSearcher searcher = new IndexSearcher(index, true); // TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search( q, new Collector() { int base; @Override public void setScorer(Scorer arg0) throws IOException {} @Override public void setNextReader(IndexReader arg0, int arg1) throws IOException { // System.out.println("Next Reader: "+arg1); base = arg1; } @Override public void collect(int docId) throws IOException { int ind = docId + base; // System.out.println("Found doc: "+ind+". Object: // "+objectList.get(ind).getId()+". Class: // "+objectList.get(ind).getAgeElClass().getName() ); res.add(objectList.get(ind)); } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // ScoreDoc[] hits = collector.topDocs().scoreDocs; return res; }
/** * 文章全文检索 * * @param search * @return */ @SuppressWarnings("unchecked") public List<Articles> searchAtl(String search) { List<Articles> list = Collections.EMPTY_LIST; try { search = urlDecode(search); logger.info("全文检索内容:" + search); if (search != null && !search.equals("")) { list = atlFullTextManager.queryContent("title", getSearchParam(search)); } } catch (ParseException e) { logger.error("全文检索失败:" + e.toString()); } return list; }
/** * 获取全文查询对象 * * @param q 查询关键字 * @param fields 查询字段 * @return 全文查询对象 */ public BooleanQuery getFullTextQuery(String q, String... fields) { Analyzer analyzer = new IKAnalyzer(); BooleanQuery query = new BooleanQuery(); try { if (StringUtils.isNotBlank(q)) { for (String field : fields) { QueryParser parser = new QueryParser(Version.LUCENE_36, field, analyzer); query.add(parser.parse(q), Occur.SHOULD); } } } catch (ParseException e) { e.printStackTrace(); } return query; }
/** * Creates a new instance; Passes the query directly on to the Lucene parser. * * @param values * @param query * @param analyzer */ public FullText(SearchValues values, String query, Class<? extends Analyzer> analyzer) { super(values); Assert.notNull(analyzer, "Analyzer required"); this.analyzer = analyzer; if (values.onlyTypes == null || values.onlyTypes.size() != 1) { throw new ApiUsageException( "Searches by full text are currently limited to a single type.\n" + "Plese use Search.onlyType()"); } if (query == null || query.length() < 1) { throw new IllegalArgumentException("Query string must be non-empty"); } if ((query.startsWith("*") || query.startsWith("?")) && !values.leadingWildcard) { throw new ApiUsageException( "Searches starting with a leading " + "wildcard (*,?) can be slow.\nPlease use " + "setAllowLeadingWildcard() to permit this usage."); } if (query.equals("*")) { throw new ApiUsageException( "Wildcard searches (*) must contain more than a single wildcard. "); } this.queryStr = query; try { final Analyzer a = analyzer.newInstance(); final QueryParser parser = new /*Analyzing*/ QueryParser("combined_fields", a); parser.setAllowLeadingWildcard(values.leadingWildcard); q = parser.parse(queryStr); } catch (ParseException pe) { final String msg = queryStr + " caused a parse exception: " + pe.getMessage(); // No longer logging these, since it's a simple user error ApiUsageException aue = new ApiUsageException(msg); throw aue; } catch (InstantiationException e) { ApiUsageException aue = new ApiUsageException(analyzer.getName() + " cannot be instantiated."); throw aue; } catch (IllegalAccessException e) { ApiUsageException aue = new ApiUsageException(analyzer.getName() + " cannot be instantiated."); throw aue; } }
private void doQuery(Scanner scanner) { scanner.nextLine(); Query query = null; while (query == null) { System.out.println("Enter a query:"); String queryLine = scanner.nextLine(); try { query = actions.parseQuery(queryLine); } catch (ParseException e) { System.out.println("Wrong syntax in query: " + e.getMessage()); System.out.println("type it again: "); } } List<String> listMatches = actions.listStoredValuesMatchingQuery(query); printResult(listMatches); }
public void testSearch(String[] record, int topDocLimit, int resultLimit) { SynonymRecordSearcher recSearcher = new SynonymRecordSearcher(record.length); for (int i = 0; i < record.length; i++) { initIdx( PluginUtil.getPluginInstallPath(HandLuceneImplTest.PLUGIN_ID).concat("data/idx") + (i + 1)); SynonymIndexSearcher searcher = new SynonymIndexSearcher( PluginUtil.getPluginInstallPath(HandLuceneImplTest.PLUGIN_ID).concat("data/idx") + (i + 1)); searcher.setTopDocLimit(topDocLimit); recSearcher.addSearcher(searcher, i); } try { TopDocs topDocs; int hits = 1; for (int i = 0; i < record.length; i++) { topDocs = recSearcher.getSearcher(i).searchDocumentBySynonym(record[i]); hits *= topDocs.totalHits; } List<OutputRecord> results = recSearcher.search(resultLimit, record); Assert.assertNotNull(results); Assert.assertFalse(results.isEmpty()); for (OutputRecord outputRecord : results) { Assert.assertNotNull(outputRecord); String[] resultingRecord = outputRecord.getRecord(); Assert.assertEquals(record.length, resultingRecord.length); System.out.println(StringUtils.join(resultingRecord, '|')); System.out.println("\t" + outputRecord.getScore()); } Assert.assertEquals(Math.min(hits, resultLimit), results.size()); for (int i = 0; i < record.length; i++) { recSearcher.getSearcher(i).close(); } } catch (ParseException e) { e.printStackTrace(); fail("should not get an exception here"); } catch (IOException e) { e.printStackTrace(); fail("should not get an exception here"); } System.out.println(""); }
private void del(String queryString) { jlog.info("Deleting " + queryString); try { Query q = this.queryParser().parse(queryString); this.writer.deleteDocuments(q); this.writer.commit(); jlog.info("Several docs deleted"); } catch (CorruptIndexException cie) { jlog.severe("Corrupt index!"); cie.printStackTrace(); } catch (IOException ioe) { jlog.severe("Couldn't del values docs:\n\t" + ioe); ioe.printStackTrace(); } catch (ParseException pe) { jlog.severe("Couldn't parse del query:\n\t" + pe); pe.printStackTrace(); } }
@RequestMapping(value = "/lucene/o_create.do") public void create( Integer siteId, Integer channelId, Date startDate, Date endDate, Integer startId, Integer max, HttpServletRequest request, HttpServletResponse response, ModelMap model) throws JSONException { try { Integer lastId = luceneContentSvc.createIndex(siteId, channelId, startDate, endDate, startId, max); JSONObject json = new JSONObject(); json.put("success", true); if (lastId != null) { json.put("lastId", lastId); } ResponseUtils.renderJson(response, json.toString()); } catch (CorruptIndexException e) { JSONObject json = new JSONObject(); json.put("success", false).put("msg", e.getMessage()); ResponseUtils.renderJson(response, json.toString()); log.error("", e); } catch (LockObtainFailedException e) { JSONObject json = new JSONObject(); json.put("success", false).put("msg", e.getMessage()); ResponseUtils.renderJson(response, json.toString()); log.error("", e); } catch (IOException e) { JSONObject json = new JSONObject(); json.put("success", false).put("msg", e.getMessage()); ResponseUtils.renderJson(response, json.toString()); log.error("", e); } catch (ParseException e) { JSONObject json = new JSONObject(); json.put("success", false).put("msg", e.getMessage()); ResponseUtils.renderJson(response, json.toString()); log.error("", e); } }
@SuppressWarnings("unchecked") public List<Book> freeTextSeachEntities( String searchWord, String[] targetFields, String orderBy, boolean reverseOrder, int startIndex, int maxResult) { FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager); QueryParser parser = new MultiFieldQueryParser( org.apache.lucene.util.Version.LUCENE_29, targetFields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_29)); parser.setAllowLeadingWildcard(true); org.apache.lucene.search.Query luceneQuery = null; try { luceneQuery = parser.parse(searchWord); System.out.println("@@@luceneQuery : " + luceneQuery.toString()); } catch (ParseException e) { System.out.println("@@@ParseEcxetpion : " + e.getMessage()); } FullTextQuery fullTextQuery = fullTextEntityManager.createFullTextQuery(luceneQuery, Book.class); if (orderBy != null) { Sort sort = new Sort(new SortField(orderBy, SortField.STRING_VAL, reverseOrder)); fullTextQuery.setSort(sort); } if (startIndex > 0 && maxResult > 0) { fullTextQuery.setFirstResult(startIndex); fullTextQuery.setMaxResults(maxResult); } Query jpaQuery = fullTextQuery; List<Book> resultEntities = jpaQuery.getResultList(); return resultEntities; }
// find indexed items by search label public List<IndexItem> findBySearchLabel(String queryString, int numOfResults) throws ParseException, IOException { // create query from incoming query string Query query = null; try { query = searchLabelQueryParser.parse(queryString); } catch (org.apache.lucene.queryParser.ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } // execute the query and get results ScoreDoc[] queryResults = searcher.search(query, numOfResults).scoreDocs; List<IndexItem> results = new ArrayList<IndexItem>(); // process the results for (ScoreDoc scoreDoc : queryResults) { Document doc = searcher.doc(scoreDoc.doc); results.add( new IndexItem( doc.get(IndexItem.TERMID), doc.get(IndexItem.LABEL), doc.get(IndexItem.SEARCHLABEL))); } return results; }
public boolean checkPDF(File pdfFile, String text) { int result = 0; try { IndexItem pdfIndexItem = index(pdfFile); // creating an instance of the indexer class and indexing the items Indexer indexer = new Indexer(INDEX_DIR); indexer.index(pdfIndexItem); indexer.close(); // creating an instance of the Searcher class to the query the index Searcher searcher = new Searcher(INDEX_DIR); result = searcher.findByContent(text, DEFAULT_RESULT_SIZE); searcher.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } return containsWord(result); }
public Set<Author> search(String queryString) { Set<Author> authors = new HashSet<Author>(); IndexSearcher searcher = getSearcher(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); QueryParser parser = new QueryParser(Version.LUCENE_29, "author.name", analyzer); Query query = null; TopDocs hits = null; try { query = parser.parse(queryString); hits = searcher.search(query, 10); } catch (ParseException e) { e.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } try { for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); Author author = new Author(doc.getField("author.name").stringValue()); authors.add(author); } } catch (Exception e) { log.error("Error while creating results from query"); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException ioe) { log.error("Error while trying to close the index searcher. ", ioe); } } } return authors; }
public List<String> complete(String query) throws IOException { _log.info("Автозаполнение началось:" + query); List<String> suggestions = new ArrayList<String>(); /*productList = productFacade.findAll(); for(Product p : productList) { if(p.getName().startsWith(query)) suggestions.add(p.getName()); }*/ try { Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс IndexSearcher is = new IndexSearcher(directory); // объект поиска QueryParser parser = new QueryParser( Version.LUCENE_31, "name", new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор Query q = parser.parse(query); // что ищем _log.info("Запрос состоит из: " + q.toString()); TopDocs results = is.search( q, null, 10); // включаем поиск ограничиваемся 10 документами, results содержит ... for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc suggestions.add(doc.get("name")); } directory.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return suggestions; }
@Override public int count(String query) { if (searcher == null) return 0; Query q; try { q = queryParser.parse(query); CountCollector cc = new CountCollector(); searcher.search(q, cc); return cc.getCount(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // ScoreDoc[] hits = collector.topDocs().scoreDocs; return -1; }
public SolrSearchResultSet getSearchResultSet( SlingHttpServletRequest request, Query query, boolean asAnon) throws SolrSearchException { try { SolrSearchResultSet rs = null; if (query.getType() == Type.SOLR) { rs = processSolrQuery(request, query, asAnon); } else if (query.getType() == Type.SPARSE) { rs = processSparseQuery(request, query, asAnon); } return rs; } catch (SolrServerException e) { LOGGER.warn(e.getMessage(), e); throw new SolrSearchException(500, e.getMessage()); } catch (ParseException e) { LOGGER.warn(e.getMessage(), e); throw new SolrSearchException(500, e.getMessage()); } catch (StorageClientException e) { LOGGER.warn(e.getMessage(), e); throw new SolrSearchException(500, e.getMessage()); } catch (AccessDeniedException e) { LOGGER.warn(e.getMessage(), e); throw new SolrSearchException(403, e.getMessage()); } }
public int count(String query) { Query q; try { q = new QueryParser(Version.LUCENE_30, defaultFieldName, analyzer).parse(query); final IndexSearcher searcher = new IndexSearcher(index, true); CountCollector cc = new CountCollector(); searcher.search(q, cc); return cc.getCount(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // ScoreDoc[] hits = collector.topDocs().scoreDocs; return -1; }
private Query getQuery( VitroRequest request, PortalFlag portalState, Analyzer analyzer, String querystr) throws SearchException, ParseException { Query query = null; try { // String querystr = request.getParameter(VitroQuery.QUERY_PARAMETER_NAME); if (querystr == null) { log.error( "There was no Parameter '" + VitroQuery.QUERY_PARAMETER_NAME + "' in the request."); return null; } else if (querystr.length() > MAX_QUERY_LENGTH) { log.debug("The search was too long. The maximum " + "query length is " + MAX_QUERY_LENGTH); return null; } QueryParser parser = getQueryParser(analyzer); query = parser.parse(querystr); String alpha = request.getParameter("alpha"); if (alpha != null && !"".equals(alpha) && alpha.length() == 1) { BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(query, BooleanClause.Occur.MUST); boolQuery.add( new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME, alpha + '*')), BooleanClause.Occur.MUST); query = boolQuery; } // check if this is classgroup filtered Object param = request.getParameter("classgroup"); if (param != null && !"".equals(param)) { BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(query, BooleanClause.Occur.MUST); boolQuery.add( new TermQuery(new Term(Entity2LuceneDoc.term.CLASSGROUP_URI, (String) param)), BooleanClause.Occur.MUST); query = boolQuery; } // check if this is rdf:type filtered param = request.getParameter("type"); if (param != null && !"".equals(param)) { BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(query, BooleanClause.Occur.MUST); boolQuery.add( new TermQuery(new Term(Entity2LuceneDoc.term.RDFTYPE, (String) param)), BooleanClause.Occur.MUST); query = boolQuery; } // if we have a flag/portal query then we add // it by making a BooelanQuery. Query flagQuery = makeFlagQuery(portalState); if (flagQuery != null) { BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(query, BooleanClause.Occur.MUST); boolQuery.add(flagQuery, BooleanClause.Occur.MUST); query = boolQuery; } log.debug("Query: " + query); } catch (ParseException e) { throw new ParseException(e.getMessage()); } catch (Exception ex) { throw new SearchException(ex.getMessage()); } return query; }
/** * Constructs a new instance; Builds a Lucence query with the provided arguments and passes it on * the Lucene parser * * @param values * @param fields Comma separated field names (name, description, etc.) * @param from Date range from in form YYYYMMDD * @param to Date range to in form YYYYMMDD * @param dateType Type of date {@link ome.api.Search#DATE_TYPE_ACQUISITION} or {@link * ome.api.Search#DATE_TYPE_IMPORT} * @param query The terms to search for * @param analyzer */ public FullText( SearchValues values, String fields, String from, String to, String dateType, String query, Class<? extends Analyzer> analyzer) { super(values); Assert.notNull(analyzer, "Analyzer required"); this.analyzer = analyzer; if (values.onlyTypes == null || values.onlyTypes.size() != 1) { throw new ApiUsageException( "Searches by full text are currently limited to a single type.\n" + "Plese use Search.onlyType()"); } if ((query == null || query.length() < 1) && (from == null || from.length() < 1) && (to == null || to.length() < 1)) { throw new IllegalArgumentException( "Query string must be non-empty if no date range is provided"); } if ((query.startsWith("*") || query.startsWith("?")) && !values.leadingWildcard) { throw new ApiUsageException( "Searches starting with a leading " + "wildcard (*,?) can be slow.\nPlease use " + "setAllowLeadingWildcard() to permit this usage."); } if (query.equals("*")) { throw new ApiUsageException( "Wildcard searches (*) must contain more than a single wildcard. "); } List<String> fieldsArray = new ArrayList<String>(); String[] tmp = fields.split("\\,"); for (String t : tmp) { t = t.trim(); if (t.length() > 0) fieldsArray.add(t); } Date dFrom; Date dTo; try { dFrom = (from != null && from.trim().length() > 0) ? DATEFORMAT.parse(from) : null; dTo = (to != null && to.trim().length() > 0) ? DATEFORMAT.parse(to) : null; } catch (java.text.ParseException e1) { throw new ApiUsageException("Invalid date format, dates must be in format YYYYMMDD."); } if (LuceneQueryBuilder.DATE_ACQUISITION.equals(dateType) && !values.onlyTypes.contains(Image.class)) { // Use import for non-images dateType = LuceneQueryBuilder.DATE_IMPORT; } try { this.queryStr = LuceneQueryBuilder.buildLuceneQuery(fieldsArray, dFrom, dTo, dateType, query); if (this.queryStr.isEmpty()) { q = null; log.info("Generated empty Lucene query"); return; // EARLY EXIT! } else { log.info("Generated Lucene query: " + this.queryStr); } } catch (InvalidQueryException e1) { throw new ApiUsageException("Invalid query: " + e1.getMessage()); } try { final Analyzer a = analyzer.newInstance(); final QueryParser parser = new /*Analyzing*/ QueryParser("combined_fields", a); parser.setAllowLeadingWildcard(values.leadingWildcard); q = parser.parse(queryStr); } catch (ParseException pe) { final String msg = queryStr + " caused a parse exception: " + pe.getMessage(); // No longer logging these, since it's a simple user error ApiUsageException aue = new ApiUsageException(msg); throw aue; } catch (InstantiationException e) { ApiUsageException aue = new ApiUsageException(analyzer.getName() + " cannot be instantiated."); throw aue; } catch (IllegalAccessException e) { ApiUsageException aue = new ApiUsageException(analyzer.getName() + " cannot be instantiated."); throw aue; } }
public void testRange() throws IOException, InvalidGeoException { double k1 = 2.0d; double b = 0.75d; double epslon = 0.05d; QueryConfiguration queryConfiguration = new QueryConfiguration(); queryConfiguration.setProperty("bm25.idf.policy", "floor_epslon"); queryConfiguration.setProperty("bm25.idf.epslon", "" + epslon); queryConfiguration.setProperty("bm25.k1", "" + k1); queryConfiguration.setProperty("bm25.b", "" + b); LgteIndexSearcherWrapper searcher = new LgteIndexSearcherWrapper(Model.OkapiBM25Model, pathUnique); IndexReader readerMulti1 = LgteIndexManager.openReader(pathMulti1, Model.OkapiBM25Model); IndexReader readerMulti2 = LgteIndexManager.openReader(pathMulti2, Model.OkapiBM25Model); Map<String, IndexReader> readers = new HashMap<String, IndexReader>(); readers.put("contents1", readerMulti1); readers.put("contents2", readerMulti2); LgteIndexSearcherWrapper searcherMulti = new LgteIndexSearcherWrapper(Model.OkapiBM25Model, new LgteIsolatedIndexReader(readers)); try { LgteQuery lgteQuery = LgteQueryParser.parseQuery( "contents1:(word2 word67 word1*) contents2:(word1* word2 word67)", searcher, queryConfiguration); LgteHits lgteHits = searcher.search(lgteQuery); LgteQuery lgteQueryMulti = LgteQueryParser.parseQuery( "contents1:(word1* word2 word67) contents2:(word1* word2 word67)", searcherMulti, queryConfiguration); LgteHits lgteHitsMulti = searcherMulti.search(lgteQueryMulti); System.out.println("EXPECTED"); System.out.println( "doc:" + lgteHits.doc(0).get(Globals.DOCUMENT_ID_FIELD) + ":" + lgteHits.score(0)); System.out.println( "doc:" + lgteHits.doc(1).get(Globals.DOCUMENT_ID_FIELD) + ":" + lgteHits.score(1)); System.out.println( "doc:" + lgteHits.doc(2).get(Globals.DOCUMENT_ID_FIELD) + ":" + lgteHits.score(2)); System.out.println("RETURN:"); System.out.println( "doc:" + lgteHitsMulti.doc(0).get(Globals.DOCUMENT_ID_FIELD) + ":" + lgteHitsMulti.score(0)); System.out.println( "doc:" + lgteHitsMulti.doc(1).get(Globals.DOCUMENT_ID_FIELD) + ":" + lgteHitsMulti.score(1)); System.out.println( "doc:" + lgteHitsMulti.doc(2).get(Globals.DOCUMENT_ID_FIELD) + ":" + lgteHitsMulti.score(2)); assertEquals( lgteHits.doc(0).get(Globals.DOCUMENT_ID_FIELD), lgteHitsMulti.doc(0).get(Globals.DOCUMENT_ID_FIELD)); assertEquals( lgteHits.doc(1).get(Globals.DOCUMENT_ID_FIELD), lgteHitsMulti.doc(1).get(Globals.DOCUMENT_ID_FIELD)); assertEquals( lgteHits.doc(2).get(Globals.DOCUMENT_ID_FIELD), lgteHitsMulti.doc(2).get(Globals.DOCUMENT_ID_FIELD)); assertEquals(lgteHits.score(0), lgteHitsMulti.score(0)); assertEquals(lgteHits.score(1), lgteHitsMulti.score(1)); assertEquals(lgteHits.score(2), lgteHitsMulti.score(2)); } catch (ParseException e) { fail(e.toString()); } searcher.close(); }
/** * Get matching entries for a query * * @param _word word that * @return map of lists of cluster results from index for this query * @throws Exception */ public List<List<String>> searchIndex(String query) throws Exception { Map<Integer, List<String>> suggestionCls = new HashMap<Integer, List<String>>(); Map<Integer, Integer> clusterOrder = new HashMap<Integer, Integer>(); List<List<String>> suggestionClsLists = new ArrayList<List<String>>(); String searchCriteria = IndexUtils.KEY_QUERY + ":" + "\"" + query + "\""; Query luceneQuery = null; try { luceneQuery = parser.parse(searchCriteria); } catch (ParseException e) { System.err.println("Lucene could not parse query: " + searchCriteria); e.printStackTrace(); } // TopDocs results = idxSearcher.search(query, 10); // TODO sort also by clusterId // sort after refinement counts Sort clRefSort = new Sort( new SortField[] { new SortField(IndexUtils.KEY_REF_COUNT, SortField.INT, true), new SortField(IndexUtils.KEY_CLUSTER_ID, SortField.INT, false) }); int clusterId; String refinement; int refCount; TopDocs docs = idxSearcher.search(luceneQuery, 1000, clRefSort); int clusterNum = 0; for (ScoreDoc match : docs.scoreDocs) { Document d = idxSearcher.doc(match.doc); clusterId = (Integer) ((NumericField) d.getFieldable(IndexUtils.KEY_CLUSTER_ID)).getNumericValue(); refinement = d.get(IndexUtils.KEY_REF); refCount = (Integer) ((NumericField) d.getFieldable(IndexUtils.KEY_REF_COUNT)).getNumericValue(); // add results to right list if (clusterOrder.containsKey(clusterId)) { // add to right list suggestionClsLists.get(clusterOrder.get(clusterId)).add(refinement); } else { // add new list clusterOrder.put(clusterId, clusterNum); suggestionClsLists.add(new ArrayList<String>()); suggestionClsLists.get(clusterOrder.get(clusterId)).add(refinement); clusterNum++; } // add results to map if (suggestionCls.containsKey(clusterId)) { suggestionCls.get(clusterId).add(refinement); } else { // for new cluster add new list List<String> clRefs = new ArrayList<String>(); clRefs.add(refinement); suggestionCls.put(clusterId, clRefs); } // System.out.println(clusterId + "\t" + refinement + "\t" + refCount); } // return suggestionCls; return suggestionClsLists; }
/** * Process properties to query sparse content directly. * * @param request * @param query * @param asAnon * @return * @throws StorageClientException * @throws AccessDeniedException */ public SolrSearchResultSet processQuery( SlingHttpServletRequest request, Query query, boolean asAnon) throws SolrSearchException { try { // use solr parsing to get the terms from the query string QueryParser parser = new QueryParser(Version.LUCENE_40, "id", new TextField().getQueryAnalyzer()); org.apache.lucene.search.Query luceneQuery = parser.parse(query.getQueryString()); Map<String, Object> props = Maps.newHashMap(); if (luceneQuery instanceof BooleanQuery) { BooleanQuery boolLucQuery = (BooleanQuery) luceneQuery; int orCount = 0; List<BooleanClause> clauses = boolLucQuery.clauses(); for (BooleanClause clause : clauses) { org.apache.lucene.search.Query clauseQuery = clause.getQuery(); Map<String, Object> subOrs = Maps.newHashMap(); // we support 1 level of nesting for OR clauses if (clauseQuery instanceof BooleanQuery) { for (BooleanClause subclause : ((BooleanQuery) clauseQuery).clauses()) { org.apache.lucene.search.Query subclauseQuery = subclause.getQuery(); extractTerms(subclause, subclauseQuery, props, subOrs); } props.put("orset" + orCount, subOrs); orCount++; } else { extractTerms(clause, clauseQuery, props, subOrs); if (!subOrs.isEmpty()) { props.put("orset" + orCount, subOrs); orCount++; } } } } else { extractTerms(null, luceneQuery, props, null); } // add the options to the parameters but prepend _ to avoid collision for (Entry<String, String> option : query.getOptions().entrySet()) { props.put("_" + option.getKey(), option.getValue()); } Session session = StorageClientUtils.adaptToSession( request.getResourceResolver().adaptTo(javax.jcr.Session.class)); ContentManager cm = session.getContentManager(); long tquery = System.currentTimeMillis(); Iterable<Content> items = cm.find(props); tquery = System.currentTimeMillis() - tquery; try { if (tquery > verySlowQueryThreshold) { SLOW_QUERY_LOGGER.error( "Very slow sparse query {} ms {} ", tquery, URLDecoder.decode(query.toString(), "UTF-8")); } else if (tquery > slowQueryThreshold) { SLOW_QUERY_LOGGER.warn( "Slow sparse query {} ms {} ", tquery, URLDecoder.decode(query.toString(), "UTF-8")); } } catch (UnsupportedEncodingException e) { // quietly swallow this exception LOGGER.debug(e.getLocalizedMessage(), e); } SolrSearchResultSet rs = new SparseSearchResultSet(items, defaultMaxResults); return rs; } catch (AccessDeniedException e) { throw new SolrSearchException(500, e.getMessage()); } catch (StorageClientException e) { throw new SolrSearchException(500, e.getMessage()); } catch (ParseException e) { throw new SolrSearchException(500, e.getMessage()); } }