public boolean match(final Sort expected, final Sort acutal) { if (expected == acutal) { return true; } else if ((expected == null) || (acutal == null)) { return false; } else { final SortField[] expectedFields = expected.getSort(); final SortField[] actualFields = acutal.getSort(); if (expectedFields.length != actualFields.length) { return false; } final ArgumentMatcher<String> matcher = ArgumentMatchers.naturalMatcher(); for (int i = 0; i < actualFields.length; i++) { final SortField actualField = actualFields[i]; final SortField expectedField = expectedFields[i]; if (!matcher.match(expectedField.getField(), actualField.getField())) { return false; } if (actualField.getType() != expectedField.getType()) { return false; } if (expectedField.getReverse() != actualField.getReverse()) { return false; } } return true; } }
@Test public void testSearch() throws IOException { String key = "tomcat"; Sort sort = new Sort(); sort.setSort(new SortField("name", SortField.SCORE)); List<String> list = SearchUtil.searchUseIK( key, path, new String[] { "name", }, sort, 0, 2, new ResultMapper<String>() { @Override public String mapRow(Document doc) { String v = doc.get("name"); return v; } }); for (String s : list) { P.println(s); } }
public ReRankCollector( int reRankDocs, int length, Query reRankQuery, double reRankWeight, SolrIndexSearcher.QueryCommand cmd, IndexSearcher searcher, Map<BytesRef, Integer> boostedPriority, boolean scale) throws IOException { super(null); this.reRankQuery = reRankQuery; this.reRankDocs = reRankDocs; this.length = length; this.boostedPriority = boostedPriority; this.scale = scale; Sort sort = cmd.getSort(); if (sort == null) { this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), true); } else { sort = sort.rewrite(searcher); this.mainCollector = TopFieldCollector.create( sort, Math.max(this.reRankDocs, length), false, true, true, true); } this.searcher = searcher; this.reRankWeight = reRankWeight; }
public CObjList getPushesToConnect() { BooleanQuery bq = new BooleanQuery(); Term decterm = new Term(CObj.docPrivate(CObj.PRV_PUSH_REQ), "true"); bq.add(new TermQuery(decterm), BooleanClause.Occur.MUST); Sort s = new Sort(); s.setSort(new SortField(CObj.docPrivateNumber(CObj.PRV_PUSH_TIME), SortField.Type.LONG, false)); return search(bq, Integer.MAX_VALUE, s); }
protected NamedList serializeTopDocs(QueryCommandResult result) throws IOException { NamedList<Object> queryResult = new NamedList<>(); queryResult.add("matches", result.getMatches()); queryResult.add("totalHits", result.getTopDocs().totalHits); // debug: assert !Float.isNaN(result.getTopDocs().getMaxScore()) == // rb.getGroupingSpec().isNeedScore(); if (!Float.isNaN(result.getTopDocs().getMaxScore())) { queryResult.add("maxScore", result.getTopDocs().getMaxScore()); } List<NamedList> documents = new ArrayList<>(); queryResult.add("documents", documents); final IndexSchema schema = rb.req.getSearcher().getSchema(); SchemaField uniqueField = schema.getUniqueKeyField(); for (ScoreDoc scoreDoc : result.getTopDocs().scoreDocs) { NamedList<Object> document = new NamedList<>(); documents.add(document); Document doc = retrieveDocument(uniqueField, scoreDoc.doc); document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName()))); if (!Float.isNaN(scoreDoc.score)) { document.add("score", scoreDoc.score); } if (!FieldDoc.class.isInstance(scoreDoc)) { continue; // thus don't add sortValues below } FieldDoc fieldDoc = (FieldDoc) scoreDoc; Object[] convertedSortValues = new Object[fieldDoc.fields.length]; for (int j = 0; j < fieldDoc.fields.length; j++) { Object sortValue = fieldDoc.fields[j]; Sort groupSort = rb.getGroupingSpec().getGroupSort(); SchemaField field = groupSort.getSort()[j].getField() != null ? schema.getFieldOrNull(groupSort.getSort()[j].getField()) : null; if (field != null) { FieldType fieldType = field.getType(); if (sortValue != null) { sortValue = fieldType.marshalSortValue(sortValue); } } convertedSortValues[j] = sortValue; } document.add("sortValues", convertedSortValues); } return queryResult; }
/** * Returns a mapping from the old document ID to its new location in the sorted index. * Implementations can use the auxiliary {@link #sort(int, DocComparator)} to compute the * old-to-new permutation given a list of documents and their corresponding values. * * <p>A return value of <tt>null</tt> is allowed and means that <code>reader</code> is already * sorted. * * <p><b>NOTE:</b> deleted documents are expected to appear in the mapping as well, they will * however be marked as deleted in the sorted view. */ DocMap sort(LeafReader reader) throws IOException { SortField fields[] = sort.getSort(); final int reverseMul[] = new int[fields.length]; final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length]; for (int i = 0; i < fields.length; i++) { reverseMul[i] = fields[i].getReverse() ? -1 : 1; comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext()); comparators[i].setScorer(FAKESCORER); } final DocComparator comparator = new DocComparator() { @Override public int compare(int docID1, int docID2) { try { for (int i = 0; i < comparators.length; i++) { // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, // the segments are always the same here... comparators[i].copy(0, docID1); comparators[i].setBottom(0); int comp = reverseMul[i] * comparators[i].compareBottom(docID2); if (comp != 0) { return comp; } } return Integer.compare(docID1, docID2); // docid order tiebreak } catch (IOException e) { throw new RuntimeException(e); } } }; return sort(reader.maxDoc(), comparator); }
public CObjList getPushesToSend() { BooleanQuery bq = new BooleanQuery(); // For a BooleanQuery with no MUST clauses one or more SHOULD clauses // must match a document for the BooleanQuery to match. Term decterm = new Term(CObj.docPrivate(CObj.PRV_PUSH_REQ), "true"); bq.add(new TermQuery(decterm), BooleanClause.Occur.SHOULD); Term nocterm = new Term(CObj.docPrivate(CObj.PRV_PUSH_REQ), "nocon"); bq.add(new TermQuery(nocterm), BooleanClause.Occur.SHOULD); Sort s = new Sort(); s.setSort(new SortField(CObj.docPrivateNumber(CObj.PRV_PUSH_TIME), SortField.Type.LONG, false)); return search(bq, Integer.MAX_VALUE, s); }
/** Creates a new Sorter to sort the index with {@code sort} */ Sorter(Sort sort) { if (sort.needsScores()) { throw new IllegalArgumentException( "Cannot sort an index with a Sort that refers to the relevance score"); } this.sort = sort; }
public static boolean includesScore(Sort sort) { if (sort == null) return true; for (SortField sf : sort.getSort()) { if (sf.getType() == SortField.Type.SCORE) return true; } return false; }
public Sort toSort(List<? extends OrderSpecifier<?>> orderBys) { List<SortField> sorts = new ArrayList<SortField>(orderBys.size()); for (OrderSpecifier<?> order : orderBys) { if (!(order.getTarget() instanceof Path<?>)) { throw new IllegalArgumentException("argument was not of type Path."); } Class<?> type = order.getTarget().getType(); boolean reverse = !order.isAscending(); Path<?> path = getPath(order.getTarget()); if (Number.class.isAssignableFrom(type)) { sorts.add(new SortField(toField(path), sortFields.get(type), reverse)); } else { sorts.add(new SortField(toField(path), sortLocale, reverse)); } } Sort sort = new Sort(); sort.setSort(sorts.toArray(new SortField[sorts.size()])); return sort; }
private Sort deepCopy(Sort sort) { if (sort == null) return null; SortField[] fields = sort.getSort(); SortField[] copyFields = new SortField[fields.length]; for (int i = 0; i < copyFields.length; i++) copyFields[i] = Util.cloneWithMarshaller(marshaller, fields[i]); return new Sort(copyFields); }
private void setTopDocs(HashMap<UUID, ClusteredTopDocs> topDocsResponses) { this.topDocsResponses = topDocsResponses; if (sort != null) { // reversing sort fields to FieldDocSortedHitQueue work properly for (SortField sf : sort.getSort()) { boolean reverse = (Boolean) ReflectionUtil.getValue(sf, "reverse"); ReflectionUtil.setValue(sf, "reverse", !reverse); } hq = ISPNPriorityQueueFactory.getFieldDocSortedHitQueue( topDocsResponses.size(), sort.getSort()); } else hq = ISPNPriorityQueueFactory.getHitQueue(topDocsResponses.size()); // taking the first value of each queue for (ClusteredTopDocs ctp : topDocsResponses.values()) { if (ctp.hasNext()) hq.add(ctp.getNext()); } }
private Sort getSort(SearchQuery searchQuery) { List<FieldSort> fieldSorts = searchQuery.getFieldSorts(); Sort sort; if (CollectionUtil.isEmpty(fieldSorts)) { sort = Sort.RELEVANCE; } else { sort = new Sort(); SortField[] targetSorts = new SortField[fieldSorts.size()]; int i = 0; for (FieldSort fieldSort : fieldSorts) { String name = fieldSort.getName(); FieldInfo fieldInfo = schema.getFieldInfos().get(name); boolean orderOfDesc = (fieldSort.getOrder() == FieldSort.DESC); SortField sortField = new SortField(name, fieldInfo.getFieldType().getSortType(), orderOfDesc); targetSorts[i++] = sortField; } sort.setSort(targetSorts); } return sort; }
protected ScoreDoc[] transformToNativeShardDoc( List<NamedList<Object>> documents, Sort groupSort, String shard, IndexSchema schema) { ScoreDoc[] scoreDocs = new ScoreDoc[documents.size()]; int j = 0; for (NamedList<Object> document : documents) { Object docId = document.get("id"); if (docId != null) { docId = docId.toString(); } else { log.error("doc {} has null 'id'", document); } Float score = (Float) document.get("score"); if (score == null) { score = Float.NaN; } Object[] sortValues = null; Object sortValuesVal = document.get("sortValues"); if (sortValuesVal != null) { sortValues = ((List) sortValuesVal).toArray(); for (int k = 0; k < sortValues.length; k++) { SchemaField field = groupSort.getSort()[k].getField() != null ? schema.getFieldOrNull(groupSort.getSort()[k].getField()) : null; if (field != null) { FieldType fieldType = field.getType(); if (sortValues[k] != null) { sortValues[k] = fieldType.unmarshalSortValue(sortValues[k]); } } } } else { log.debug("doc {} has null 'sortValues'", document); } scoreDocs[j++] = new ShardDoc(score, sortValues, docId, shard); } return scoreDocs; }
/** * Creates a ToParentBlockJoinCollector. The provided sort must not be null. If you pass true * trackScores, all ToParentBlockQuery instances must not use ScoreMode.None. */ public ToParentBlockJoinCollector( Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) throws IOException { // TODO: allow null sort to be specialized to relevance // only collector this.sort = sort; this.trackMaxScore = trackMaxScore; if (trackMaxScore) { maxScore = Float.MIN_VALUE; } // System.out.println("numParentHits=" + numParentHits); this.trackScores = trackScores; this.numParentHits = numParentHits; queue = FieldValueHitQueue.create(sort.getSort(), numParentHits); comparators = queue.getComparators(); }
protected NamedList serializeTopGroups(TopGroups<BytesRef> data, SchemaField groupField) throws IOException { NamedList<Object> result = new NamedList<>(); result.add("totalGroupedHitCount", data.totalGroupedHitCount); result.add("totalHitCount", data.totalHitCount); if (data.totalGroupCount != null) { result.add("totalGroupCount", data.totalGroupCount); } CharsRef spare = new CharsRef(); final IndexSchema schema = rb.req.getSearcher().getSchema(); SchemaField uniqueField = schema.getUniqueKeyField(); for (GroupDocs<BytesRef> searchGroup : data.groups) { NamedList<Object> groupResult = new NamedList<>(); groupResult.add("totalHits", searchGroup.totalHits); if (!Float.isNaN(searchGroup.maxScore)) { groupResult.add("maxScore", searchGroup.maxScore); } List<NamedList<Object>> documents = new ArrayList<>(); for (int i = 0; i < searchGroup.scoreDocs.length; i++) { NamedList<Object> document = new NamedList<>(); documents.add(document); StoredDocument doc = retrieveDocument(uniqueField, searchGroup.scoreDocs[i].doc); document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName()))); if (!Float.isNaN(searchGroup.scoreDocs[i].score)) { document.add("score", searchGroup.scoreDocs[i].score); } if (!(searchGroup.scoreDocs[i] instanceof FieldDoc)) { continue; } FieldDoc fieldDoc = (FieldDoc) searchGroup.scoreDocs[i]; Object[] convertedSortValues = new Object[fieldDoc.fields.length]; for (int j = 0; j < fieldDoc.fields.length; j++) { Object sortValue = fieldDoc.fields[j]; Sort sortWithinGroup = rb.getGroupingSpec().getSortWithinGroup(); SchemaField field = sortWithinGroup.getSort()[j].getField() != null ? schema.getFieldOrNull(sortWithinGroup.getSort()[j].getField()) : null; if (field != null) { FieldType fieldType = field.getType(); if (sortValue != null) { sortValue = fieldType.marshalSortValue(sortValue); } } convertedSortValues[j] = sortValue; } document.add("sortValues", convertedSortValues); } groupResult.add("documents", documents); String groupValue = searchGroup.groupValue != null ? groupField.getType().indexedToReadable(searchGroup.groupValue.utf8ToString()) : null; result.add(groupValue, groupResult); } return result; }
public synchronized SearchResult search(Rule rule, String beginTime, String endTime) { Result result; LinkedList<Result> search_results = new LinkedList<Result>(); SearchResult searchResult = null; if (!(new File(indexDir).exists()) || (new File(indexDir).listFiles().length == 0)) { System.out.println("索引文件夹不存在或文件夹为空~!"); return searchResult; } type = rule.getFileType(); search_results.clear(); try { searcher = new IndexSearcher(indexDir); // 定义查询结果集,最多显示10000条结果记录 topCollector = new TopDocCollector(10000); // 使用IKAnalyzer的IKSimilarity相似度评估器 searcher.setSimilarity(new IKSimilarity()); BooleanQuery bQuery = (BooleanQuery) parseRuleToQuery(rule); /* * 由于RangeQuery有BooleanClause数量的限制,暂时没有找到解决办法, * 所以先用RangeFilter代替 BooleanQuery bQuery = new BooleanQuery(); bQuery.add(parseRuleToQuery(rule), BooleanClause.Occur.MUST); Term t1 = new Term("time", bTime); Term t2 = new Term("time", eTime); RangeQuery timeRangeQuery = new RangeQuery(t1, t2, true); bQuery.add(timeRangeQuery, BooleanClause.Occur.MUST); */ // 时间范围过滤器 RangeFilter filter = new RangeFilter("time", beginTime + " 00:00:00", endTime + " 23:59:59", true, true); CachingWrapperFilter cwFilter = new CachingWrapperFilter(filter); // 结果按时间降序排列 Sort sort = new Sort(); sort.setSort(new SortField("time", SortField.STRING, true)); begin = new Date(); // 搜索 topDocs = searcher.search(bQuery, cwFilter, 1000, sort); scoreDocs = topDocs.scoreDocs; end = new Date(); sTime = (end.getTime() - begin.getTime()); for (ScoreDoc scoreDoc : scoreDocs) { doc = searcher.doc(scoreDoc.doc); typeDoc = doc.get("filetype"); if (!type.equals("任何格式") && !type.equalsIgnoreCase(typeDoc)) continue; titleDoc = doc.get("title"); contentDoc = doc.get("content"); urlDoc = doc.get("url"); timeDoc = doc.get("time"); pageMirrorPath = doc.get("pageMirrorPath"); source = doc.get("source"); result = new Result(); // 文件类型 result.setFiletype(typeDoc); // 标题 result.setTitle(titleDoc); // 内容 result.setContent(contentDoc); // 链接 result.setUrl(urlDoc); // 时间,转换为Date类型 Date datetime = new Date(); try { datetime = format.parse(timeDoc); } catch (ParseException e) { // System.out.println(timeDoc + "转换失败!"); } result.setTime(datetime); // 网页快照的路径 result.setPageMirrorPath(pageMirrorPath); // 来源 result.setSource(source); search_results.add(result); } System.out.println("共获得" + search_results.size() + "条查询结果!"); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } searchResult = new SearchResult(sTime, rule, search_results); return searchResult; }
/** * @Method Description :绑定前台展会数据 * * @author : 林俊钦 * @date : Nov 1, 2011 3:06:11 PM */ public String list() throws Exception { // 设置网页位置 super.setPage_position(this.module_type); // 构造list列表搜索条件 List shList = new ArrayList(); Sort sort = new Sort(); if (!reqKeyword("title", module_type, shList)) { // 按分类列表选择列表 shList = this.normalSearch("cat_attr", cat_id, shList); // 按分类列表选择列表 shList = this.normalSearch("area_attr", area_id, shList); // 按搜索内容选择列表 shList = this.normalSearch("title", searchText, shList); // 分类属性的搜索 if (attrString != null && !attrString.equals("")) { String attrs[] = attrString.split(","); for (int i = 0; i < attrs.length; i++) { if (attrs[i] != null && !attrs[i].equals("")) { String vals[] = attrs[i].split("\\|"); if (vals != null && vals.length > 1 && !vals[1].equals("none")) { shList = normalSearch("attr_desc", vals[1], shList); } } } } SortField sf = new SortField("lu_in_date", SortField.STRING, true); // 降序 sort.setSort(new SortField[] {sf}); } // 找通过审核的记录 shList = normalSearch("info_state", "1", shList); // 如果搜索内容为空则不搜索 if (is_souch) { SearchIndex si = new SearchIndex(module_type); // 计算符合条件条数 int count = si.getCount(shList); infoCount = count; // lucene的分页插件 lucenePageTool(count); // 分类信息分组 cateList = si.catInfoNum(shList); // 地区信息分组 areaList = si.areaInfoNum(shList); // 搜索符合条件的list exhibitionList = si.search(shList, sort, pages_s, pageSize_s); // 替换list表中字段的处理 exhibitionList = ToolsFuc.replaceList(exhibitionList); if (cateList != null && cateList.size() == 0) { if (cat_id != null && !cat_id.equals("")) { // 分类属性信息开始 SearchIndex attrsi = new SearchIndex("categoryattr"); List aList = new ArrayList(); aList = normalSearch("cat_attr", cat_id, aList); aList = normalSearch("attr_type", "2", aList); aList = normalSearch("is_must", "1", aList); attrList = attrsi.search(aList, null, 0, 0); // 分类属性信息结束 // 分类属性值信息开始 SearchIndex attrvalue = new SearchIndex("attrvalue"); List vList = new ArrayList(); vList = normalSearch("cat_attr", cat_id, vList); vList = normalSearch("attr_type", "2", vList); vList = normalSearch("is_must", "1", vList); attrvalueList = attrvalue.search(aList, null, 0, 0); // 分类属性值信息结束 } } } // 绑定展会热门排行 SearchIndex si = new SearchIndex(module_type); List topMapList = new ArrayList(); topMapList = normalSearch("info_state", "1", topMapList); Sort sort1 = new Sort(); SortField sf = new SortField("clicknum", SortField.STRING, true); // 降序 sort1.setSort(new SortField[] {sf}); topList = si.search(topMapList, sort1, 1, 10); return goUrl("showinfoList"); }
public org.apache.lucene.search.Sort sort(Options schema) { return sort == null ? null : sort.sort(schema); }
/** * Returns the identifier of this {@link Sorter}. * * <p>This identifier is similar to {@link Object#hashCode()} and should be chosen so that two * instances of this class that sort documents likewise will have the same identifier. On the * contrary, this identifier should be different on different {@link Sort sorts}. */ public String getID() { return sort.toString(); }
// Test using various international locales with accented characters (which // sort differently depending on locale) // // Copied (and slightly modified) from // org.apache.lucene.search.TestSort.testInternationalSort() // // TODO: this test is really fragile. there are already 3 different cases, // depending upon unicode version. public void testCollationKeySort( Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, String usResult, String frResult, String svResult, String dkResult) throws Exception { Directory indexStore = newDirectory(); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); analyzer.addAnalyzer("US", usAnalyzer); analyzer.addAnalyzer("France", franceAnalyzer); analyzer.addAnalyzer("Sweden", swedenAnalyzer); analyzer.addAnalyzer("Denmark", denmarkAnalyzer); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); // document data: // the tracer field is used to determine which document was hit String[][] sortData = new String[][] { // tracer contents US France Sweden (sv_SE) Denmark // (da_DK) {"A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche"}, {"B", "y", "HAT", "HAT", "HAT", "HAT"}, {"C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9"}, {"D", "y", "HUT", "HUT", "HUT", "HUT"}, {"E", "x", "peach", "peach", "peach", "peach"}, {"F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T"}, {"G", "x", "sin", "sin", "sin", "sin"}, {"H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T"}, {"I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn"}, {"J", "y", "HOT", "HOT", "HOT", "HOT"}, }; for (int i = 0; i < sortData.length; ++i) { Document doc = new Document(); doc.add(new Field("tracer", sortData[i][0], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", sortData[i][1], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][2] != null) doc.add(new Field("US", sortData[i][2], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][3] != null) doc.add(new Field("France", sortData[i][3], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][4] != null) doc.add(new Field("Sweden", sortData[i][4], Field.Store.NO, Field.Index.ANALYZED)); if (sortData[i][5] != null) doc.add(new Field("Denmark", sortData[i][5], Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.forceMerge(1); writer.close(); IndexReader reader = IndexReader.open(indexStore); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(); Query queryX = new TermQuery(new Term("contents", "x")); Query queryY = new TermQuery(new Term("contents", "y")); sort.setSort(new SortField("US", SortField.STRING)); assertMatches(searcher, queryY, sort, usResult); sort.setSort(new SortField("France", SortField.STRING)); assertMatches(searcher, queryX, sort, frResult); sort.setSort(new SortField("Sweden", SortField.STRING)); assertMatches(searcher, queryY, sort, svResult); sort.setSort(new SortField("Denmark", SortField.STRING)); assertMatches(searcher, queryY, sort, dkResult); searcher.close(); reader.close(); indexStore.close(); }
/** {@inheritDoc} */ @Override public Map<String, ?> transformToNative( NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) { Map<String, Object> result = new HashMap<>(); final IndexSchema schema = rb.req.getSearcher().getSchema(); for (Map.Entry<String, NamedList> entry : shardResponse) { String key = entry.getKey(); NamedList commandResult = entry.getValue(); Integer totalGroupedHitCount = (Integer) commandResult.get("totalGroupedHitCount"); Integer totalHits = (Integer) commandResult.get("totalHits"); if (totalHits != null) { Integer matches = (Integer) commandResult.get("matches"); Float maxScore = (Float) commandResult.get("maxScore"); if (maxScore == null) { maxScore = Float.NaN; } @SuppressWarnings("unchecked") List<NamedList<Object>> documents = (List<NamedList<Object>>) commandResult.get("documents"); ScoreDoc[] scoreDocs = transformToNativeShardDoc(documents, groupSort, shard, schema); final TopDocs topDocs; if (sortWithinGroup.equals(Sort.RELEVANCE)) { topDocs = new TopDocs(totalHits, scoreDocs, maxScore); } else { topDocs = new TopFieldDocs(totalHits, scoreDocs, sortWithinGroup.getSort(), maxScore); } result.put(key, new QueryCommandResult(topDocs, matches)); continue; } Integer totalHitCount = (Integer) commandResult.get("totalHitCount"); List<GroupDocs<BytesRef>> groupDocs = new ArrayList<>(); for (int i = 2; i < commandResult.size(); i++) { String groupValue = commandResult.getName(i); @SuppressWarnings("unchecked") NamedList<Object> groupResult = (NamedList<Object>) commandResult.getVal(i); Integer totalGroupHits = (Integer) groupResult.get("totalHits"); Float maxScore = (Float) groupResult.get("maxScore"); if (maxScore == null) { maxScore = Float.NaN; } @SuppressWarnings("unchecked") List<NamedList<Object>> documents = (List<NamedList<Object>>) groupResult.get("documents"); ScoreDoc[] scoreDocs = transformToNativeShardDoc(documents, sortWithinGroup, shard, schema); BytesRef groupValueRef = groupValue != null ? new BytesRef(groupValue) : null; groupDocs.add( new GroupDocs<>(Float.NaN, maxScore, totalGroupHits, scoreDocs, groupValueRef, null)); } @SuppressWarnings("unchecked") GroupDocs<BytesRef>[] groupDocsArr = groupDocs.toArray(new GroupDocs[groupDocs.size()]); TopGroups<BytesRef> topGroups = new TopGroups<>( groupSort.getSort(), sortWithinGroup.getSort(), totalHitCount, totalGroupedHitCount, groupDocsArr, Float.NaN); result.put(key, topGroups); } return result; }
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) { SortSpec ss = rb.getSortSpec(); Sort sort = ss.getSort(); SortField[] sortFields = null; if (sort != null) sortFields = sort.getSort(); else { sortFields = new SortField[] {SortField.FIELD_SCORE}; } SchemaField uniqueKeyField = rb.req.getSchema().getUniqueKeyField(); // id to shard mapping, to eliminate any accidental dups HashMap<Object, String> uniqueDoc = new HashMap<Object, String>(); // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) ShardFieldSortedHitQueue queue; queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount()); long numFound = 0; Float maxScore = null; for (ShardResponse srsp : sreq.responses) { SolrDocumentList docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response"); // calculate global maxScore and numDocsFound if (docs.getMaxScore() != null) { maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore()); } numFound += docs.getNumFound(); NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values")); // go through every doc in this response, construct a ShardDoc, and // put it in the priority queue so it can be ordered. for (int i = 0; i < docs.size(); i++) { SolrDocument doc = docs.get(i); Object id = doc.getFieldValue(uniqueKeyField.getName()); String prevShard = uniqueDoc.put(id, srsp.getShard()); if (prevShard != null) { // duplicate detected numFound--; // For now, just always use the first encountered since we can't currently // remove the previous one added to the priority queue. If we switched // to the Java5 PriorityQueue, this would be easier. continue; // make which duplicate is used deterministic based on shard // if (prevShard.compareTo(srsp.shard) >= 0) { // TODO: remove previous from priority queue // continue; // } } ShardDoc shardDoc = new ShardDoc(); shardDoc.id = id; shardDoc.shard = srsp.getShard(); shardDoc.orderInShard = i; Object scoreObj = doc.getFieldValue("score"); if (scoreObj != null) { if (scoreObj instanceof String) { shardDoc.score = Float.parseFloat((String) scoreObj); } else { shardDoc.score = (Float) scoreObj; } } shardDoc.sortFieldValues = sortFieldValues; queue.insertWithOverflow(shardDoc); } // end for-each-doc-in-response } // end for-each-response // The queue now has 0 -> queuesize docs, where queuesize <= start + rows // So we want to pop the last documents off the queue to get // the docs offset -> queuesize int resultSize = queue.size() - ss.getOffset(); resultSize = Math.max(0, resultSize); // there may not be any docs in range Map<Object, ShardDoc> resultIds = new HashMap<Object, ShardDoc>(); for (int i = resultSize - 1; i >= 0; i--) { ShardDoc shardDoc = (ShardDoc) queue.pop(); shardDoc.positionInResponse = i; // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) resultIds.put(shardDoc.id.toString(), shardDoc); } SolrDocumentList responseDocs = new SolrDocumentList(); if (maxScore != null) responseDocs.setMaxScore(maxScore); responseDocs.setNumFound(numFound); responseDocs.setStart(ss.getOffset()); // size appropriately for (int i = 0; i < resultSize; i++) responseDocs.add(null); // save these results in a private area so we can access them // again when retrieving stored fields. // TODO: use ResponseBuilder (w/ comments) or the request context? rb.resultIds = resultIds; rb._responseDocs = responseDocs; }
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { Sort sort = rb.getSortSpec().getSort(); SortField[] sortFields = sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort(); NamedList sortVals = new NamedList(); // order is important for the sort fields Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field SolrIndexReader reader = searcher.getReader(); SolrIndexReader[] readers = reader.getLeafReaders(); SolrIndexReader subReader = reader; if (readers.length == 1) { // if there is a single segment, use that subReader and avoid looking up each time subReader = readers[0]; readers = null; } int[] offsets = reader.getLeafOffsets(); for (SortField sortField : sortFields) { int type = sortField.getType(); if (type == SortField.SCORE || type == SortField.DOC) continue; FieldComparator comparator = null; FieldComparator comparators[] = (readers == null) ? null : new FieldComparator[readers.length]; String fieldname = sortField.getField(); FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname); DocList docList = rb.getResults().docList; ArrayList<Object> vals = new ArrayList<Object>(docList.size()); DocIterator it = rb.getResults().docList.iterator(); int offset = 0; int idx = 0; while (it.hasNext()) { int doc = it.nextDoc(); if (readers != null) { idx = SolrIndexReader.readerIndex(doc, offsets); subReader = readers[idx]; offset = offsets[idx]; comparator = comparators[idx]; } if (comparator == null) { comparator = sortField.getComparator(1, 0); comparator = comparator.setNextReader(subReader, offset); if (comparators != null) comparators[idx] = comparator; } doc -= offset; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); // Sortable float, double, int, long types all just use a string // comparator. For these, we need to put the type into a readable // format. One reason for this is that XML can't represent all // string values (or even all unicode code points). // indexedToReadable() should be a no-op and should // thus be harmless anyway (for all current ways anyway) if (val instanceof String) { field.setValue((String) val); val = ft.toObject(field); } // Must do the same conversion when sorting by a // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { field.setValue(((BytesRef) val).utf8ToString()); val = ft.toObject(field); } vals.add(val); } sortVals.add(fieldname, vals); } rsp.add("sort_values", sortVals); } }
/** {@inheritDoc} */ @Override public Map<String, ?> transformToNative( NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) { Map<String, Object> result = new HashMap<>(); final IndexSchema schema = rb.req.getSearcher().getSchema(); for (Map.Entry<String, NamedList> entry : shardResponse) { String key = entry.getKey(); NamedList commandResult = entry.getValue(); Integer totalGroupedHitCount = (Integer) commandResult.get("totalGroupedHitCount"); Integer totalHits = (Integer) commandResult.get("totalHits"); if (totalHits != null) { Integer matches = (Integer) commandResult.get("matches"); Float maxScore = (Float) commandResult.get("maxScore"); if (maxScore == null) { maxScore = Float.NaN; } @SuppressWarnings("unchecked") List<NamedList<Object>> documents = (List<NamedList<Object>>) commandResult.get("documents"); ScoreDoc[] scoreDocs = new ScoreDoc[documents.size()]; int j = 0; for (NamedList<Object> document : documents) { Object docId = document.get("id"); Object uniqueId = null; if (docId != null) uniqueId = docId.toString(); else log.warn("doc {} has null 'id'", document); Float score = (Float) document.get("score"); if (score == null) { score = Float.NaN; } Object[] sortValues = null; Object sortValuesVal = document.get("sortValues"); if (sortValuesVal != null) { sortValues = ((List) sortValuesVal).toArray(); for (int k = 0; k < sortValues.length; k++) { SchemaField field = groupSort.getSort()[k].getField() != null ? schema.getFieldOrNull(groupSort.getSort()[k].getField()) : null; if (field != null) { FieldType fieldType = field.getType(); if (sortValues[k] != null) { sortValues[k] = fieldType.unmarshalSortValue(sortValues[k]); } } } } else { log.warn("doc {} has null 'sortValues'", document); } scoreDocs[j++] = new ShardDoc(score, sortValues, uniqueId, shard); } result.put( key, new QueryCommandResult(new TopDocs(totalHits, scoreDocs, maxScore), matches)); continue; } Integer totalHitCount = (Integer) commandResult.get("totalHitCount"); List<GroupDocs<BytesRef>> groupDocs = new ArrayList<>(); for (int i = 2; i < commandResult.size(); i++) { String groupValue = commandResult.getName(i); @SuppressWarnings("unchecked") NamedList<Object> groupResult = (NamedList<Object>) commandResult.getVal(i); Integer totalGroupHits = (Integer) groupResult.get("totalHits"); Float maxScore = (Float) groupResult.get("maxScore"); if (maxScore == null) { maxScore = Float.NaN; } @SuppressWarnings("unchecked") List<NamedList<Object>> documents = (List<NamedList<Object>>) groupResult.get("documents"); ScoreDoc[] scoreDocs = new ScoreDoc[documents.size()]; int j = 0; for (NamedList<Object> document : documents) { Object uniqueId = document.get("id").toString(); Float score = (Float) document.get("score"); if (score == null) { score = Float.NaN; } Object[] sortValues = ((List) document.get("sortValues")).toArray(); for (int k = 0; k < sortValues.length; k++) { SchemaField field = sortWithinGroup.getSort()[k].getField() != null ? schema.getFieldOrNull(sortWithinGroup.getSort()[k].getField()) : null; if (field != null) { FieldType fieldType = field.getType(); if (sortValues[k] != null) { sortValues[k] = fieldType.unmarshalSortValue(sortValues[k]); } } } scoreDocs[j++] = new ShardDoc(score, sortValues, uniqueId, shard); } BytesRef groupValueRef = groupValue != null ? new BytesRef(groupValue) : null; groupDocs.add( new GroupDocs<>(Float.NaN, maxScore, totalGroupHits, scoreDocs, groupValueRef, null)); } @SuppressWarnings("unchecked") GroupDocs<BytesRef>[] groupDocsArr = groupDocs.toArray(new GroupDocs[groupDocs.size()]); TopGroups<BytesRef> topGroups = new TopGroups<>( groupSort.getSort(), sortWithinGroup.getSort(), totalHitCount, totalGroupedHitCount, groupDocsArr, Float.NaN); result.put(key, topGroups); } return result; }
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. // // TODO: See SOLR-5595 boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { NamedList<Object[]> sortVals = new NamedList<>(); // order is important for the sort fields IndexReaderContext topReaderContext = searcher.getTopReaderContext(); List<LeafReaderContext> leaves = topReaderContext.leaves(); LeafReaderContext currentLeaf = null; if (leaves.size() == 1) { // if there is a single segment, use that subReader and avoid looking up each time currentLeaf = leaves.get(0); leaves = null; } DocList docList = rb.getResults().docList; // sort ids from lowest to highest so we can access them in order int nDocs = docList.size(); final long[] sortedIds = new long[nDocs]; final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds DocList docs = rb.getResults().docList; DocIterator it = docs.iterator(); for (int i = 0; i < nDocs; i++) { sortedIds[i] = (((long) it.nextDoc()) << 32) | i; scores[i] = docs.hasScores() ? it.score() : Float.NaN; } // sort ids and scores together new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { long tmpId = sortedIds[i]; float tmpScore = scores[i]; sortedIds[i] = sortedIds[j]; scores[i] = scores[j]; sortedIds[j] = tmpId; scores[j] = tmpScore; } @Override protected int compare(int i, int j) { return Long.compare(sortedIds[i], sortedIds[j]); } }.sort(0, sortedIds.length); SortSpec sortSpec = rb.getSortSpec(); Sort sort = searcher.weightSort(sortSpec.getSort()); SortField[] sortFields = sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort(); List<SchemaField> schemaFields = sortSpec.getSchemaFields(); for (int fld = 0; fld < schemaFields.size(); fld++) { SchemaField schemaField = schemaFields.get(fld); FieldType ft = null == schemaField ? null : schemaField.getType(); SortField sortField = sortFields[fld]; SortField.Type type = sortField.getType(); // :TODO: would be simpler to always serialize every position of SortField[] if (type == SortField.Type.SCORE || type == SortField.Type.DOC) continue; FieldComparator<?> comparator = null; LeafFieldComparator leafComparator = null; Object[] vals = new Object[nDocs]; int lastIdx = -1; int idx = 0; for (int i = 0; i < sortedIds.length; ++i) { long idAndPos = sortedIds[i]; float score = scores[i]; int doc = (int) (idAndPos >>> 32); int position = (int) idAndPos; if (leaves != null) { idx = ReaderUtil.subIndex(doc, leaves); currentLeaf = leaves.get(idx); if (idx != lastIdx) { // we switched segments. invalidate comparator. comparator = null; } } if (comparator == null) { comparator = sortField.getComparator(1, 0); leafComparator = comparator.getLeafComparator(currentLeaf); } doc -= currentLeaf.docBase; // adjust for what segment this is in leafComparator.setScorer(new FakeScorer(doc, score)); leafComparator.copy(0, doc); Object val = comparator.value(0); if (null != ft) val = ft.marshalSortValue(val); vals[position] = val; } sortVals.add(sortField.getField(), vals); } rsp.add("merge_values", sortVals); } }
/** * Accumulates groups for the BlockJoinQuery specified by its slot. * * @param slot Search query's slot * @param offset Parent docs offset * @param maxDocsPerGroup Upper bound of documents per group number * @param withinGroupOffset Offset within each group of child docs * @param withinGroupSort Sort criteria within groups * @param fillSortFields Specifies whether to add sort fields or not * @return TopGroups for the query specified by slot * @throws IOException if there is a low-level I/O error */ @SuppressWarnings({"unchecked", "rawtypes"}) private TopGroups<Integer> accumulateGroups( int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException { final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset]; final FakeScorer fakeScorer = new FakeScorer(); int totalGroupedHitCount = 0; // System.out.println("slot=" + slot); for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) { final OneGroup og = sortedGroups[groupIDX]; final int numChildDocs; if (slot == -1 || slot >= og.counts.length) { numChildDocs = 0; } else { numChildDocs = og.counts[slot]; } // Number of documents in group should be bounded to prevent redundant memory allocation final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup)); // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" // + maxDocsPerGroup); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: final TopDocsCollector<?> collector; if (withinGroupSort == null) { // System.out.println("sort by score"); // Sort by score if (!trackScores) { throw new IllegalArgumentException( "cannot sort by relevance within group: trackScores=false"); } collector = TopScoreDocCollector.create(numDocsInGroup, true); } else { // Sort by fields collector = TopFieldCollector.create( withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true); } collector.setScorer(fakeScorer); collector.setNextReader(og.readerContext); for (int docIDX = 0; docIDX < numChildDocs; docIDX++) { // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length); final int doc = og.docs[slot][docIDX]; fakeScorer.doc = doc; if (trackScores) { fakeScorer.score = og.scores[slot][docIDX]; } collector.collect(doc); } totalGroupedHitCount += numChildDocs; final Object[] groupSortValues; if (fillSortFields) { groupSortValues = new Object[comparators.length]; for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot); } } else { groupSortValues = null; } final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup); groups[groupIDX - offset] = new GroupDocs<>( og.score, topDocs.getMaxScore(), numChildDocs, topDocs.scoreDocs, og.doc, groupSortValues); } return new TopGroups<>( new TopGroups<>( sort.getSort(), withinGroupSort == null ? null : withinGroupSort.getSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount); }