/** * conversion from a SolrQueryResponse (which is a solr-internal data format) to SolrDocumentList * (which is a solrj-format) The conversion is done inside the solrj api using the * BinaryResponseWriter and a very complex unfolding process via * org.apache.solr.common.util.JavaBinCodec.marshal. * * @param request * @param sqr * @return */ public SolrDocumentList SolrQueryResponse2SolrDocumentList( final SolrQueryRequest req, final SolrQueryResponse rsp) { SolrDocumentList sdl = new SolrDocumentList(); NamedList<?> nl = rsp.getValues(); ResultContext resultContext = (ResultContext) nl.get("response"); DocList response = resultContext == null ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f) : resultContext.docs; sdl.setNumFound(response == null ? 0 : response.matches()); sdl.setStart(response == null ? 0 : response.offset()); String originalName = Thread.currentThread().getName(); if (response != null) { try { SolrIndexSearcher searcher = req.getSearcher(); final int responseCount = response.size(); DocIterator iterator = response.iterator(); for (int i = 0; i < responseCount; i++) { int docid = iterator.nextDoc(); Thread.currentThread() .setName("EmbeddedSolrConnector.SolrQueryResponse2SolrDocumentList: " + docid); Document responsedoc = searcher.doc(docid, (Set<String>) null); SolrDocument sordoc = doc2SolrDoc(responsedoc); sdl.add(sordoc); } } catch (IOException e) { ConcurrentLog.logException(e); } } Thread.currentThread().setName(originalName); return sdl; }
/** * check if a given document, identified by url hash as document id exists * * @param id the url hash and document id * @return the load date if any entry in solr exists, -1 otherwise * @throws IOException */ @Override public LoadTimeURL getLoadTimeURL(String id) throws IOException { int responseCount = 0; DocListSearcher docListSearcher = null; try { docListSearcher = new DocListSearcher( "{!cache=false raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, null, 0, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName()); responseCount = docListSearcher.response.size(); if (responseCount == 0) return null; SolrIndexSearcher searcher = docListSearcher.request.getSearcher(); DocIterator iterator = docListSearcher.response.iterator(); // for (int i = 0; i < responseCount; i++) { Document doc = searcher.doc(iterator.nextDoc(), AbstractSolrConnector.SOLR_ID_and_LOAD_DATE_FIELDS); if (doc == null) return null; return AbstractSolrConnector.getLoadTimeURL(doc); // } } catch (Throwable e) { ConcurrentLog.logException(e); throw new IOException(e.getMessage()); } finally { if (docListSearcher != null) docListSearcher.close(); } }
int collect(DocSet docs, int slot) throws IOException { int count = 0; SolrIndexSearcher searcher = fcontext.searcher; final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); if (ctx == null) { // should be impossible throw new RuntimeException("INTERNAL FACET ERROR"); } segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReader(ctx); } count++; collect(doc - segBase, slot); // per-seg collectors } return count; }
public InvertResult invertScan(IndexSchema schema, InvertParams params) throws Exception { InvertResult rtn = new InvertResult(); rtn.setParams(schema, params); DocSet docset = params.getDocset(); DocSet[] subdocset = new DocSet[subReaders.length]; if (subdocset.length == 1) { subdocset[0] = docset; } else { for (int i = 0; i < subReaders.length; i++) { subdocset[i] = new BitDocSet(); } int index = 0; int end = this.getend(index); DocIterator iter = docset.iterator(); while (iter.hasNext()) { int doc = iter.nextDoc(); if (doc >= end) { index = this.readerIndex(doc); end = this.getend(index); } subdocset[index].add(doc - this.starts[index]); } } for (int i = 0; i < subReaders.length; i++) { params.setDocset(subdocset[i]); rtn.merge(subReaders[i].invertScan(schema, params)); } return rtn; }
@Override public void writeDocList(String name, DocList ids, Set<String> fields, Map otherFields) throws IOException { boolean includeScore = false; if (fields != null) { includeScore = fields.contains("score"); if (fields.size() == 0 || (fields.size() == 1 && includeScore) || fields.contains("*")) { fields = null; // null means return all stored fields } } int sz = ids.size(); writeMapOpener(includeScore ? 4 : 3); incLevel(); writeKey("numFound", false); writeInt(null, ids.matches()); writeMapSeparator(); writeKey("start", false); writeInt(null, ids.offset()); if (includeScore) { writeMapSeparator(); writeKey("maxScore", false); writeFloat(null, ids.maxScore()); } writeMapSeparator(); // indent(); writeKey("docs", false); writeArrayOpener(sz); incLevel(); boolean first = true; SolrIndexSearcher searcher = req.getSearcher(); DocIterator iterator = ids.iterator(); for (int i = 0; i < sz; i++) { int id = iterator.nextDoc(); Document doc = searcher.doc(id, fields); if (first) { first = false; } else { writeArraySeparator(); } indent(); writeDoc(null, doc, fields, (includeScore ? iterator.score() : 0.0f), includeScore); } decLevel(); writeArrayCloser(); if (otherFields != null) { writeMap(null, otherFields, true, false); } decLevel(); indent(); writeMapCloser(); }
public static SolrReaderSetScorer createReaderSetScorer( Weight weight, AtomicReaderContext context, Bits acceptDocs, SolrIndexSearcher searcher, String authorities, AtomicReader reader) throws IOException { DocSet readableDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities); if (readableDocSet == null) { String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc())); BooleanQuery bQuery = new BooleanQuery(); for (String current : auths) { bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD); } DocSet aclDocs = searcher.getDocSet(bQuery); BooleanQuery aQuery = new BooleanQuery(); for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/ ) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = searcher.getAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID).get(docID); SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID); Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID)); aQuery.add(query, Occur.SHOULD); if ((aQuery.clauses().size() > 999) || !it.hasNext()) { DocSet docsForAclId = searcher.getDocSet(aQuery); readableDocSet = readableDocSet.union(docsForAclId); aQuery = new BooleanQuery(); } } // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) readableDocSet = readableDocSet.andNot(aclDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet); } // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, // authorities, readableDocSet) // plus check of course, for presence in cache at start of method. return new SolrReaderSetScorer(weight, readableDocSet, context, acceptDocs, searcher); }
/** * Generates a list of Highlighted query fragments for each item in a list of documents, or * returns null if highlighting is disabled. * * @param docs query results * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * @return NamedList containing a NamedList for each document, which in turns contains sets * (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting( DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
protected void processIds( ResponseBuilder rb, DocList dl, IndexSchema schema, SolrIndexSearcher searcher) throws IOException { StringBuilder sb = new StringBuilder(); Set<String> fields = Collections.singleton(schema.getUniqueKeyField().getName()); for (DocIterator iter = dl.iterator(); iter.hasNext(); ) { sb.append(schema.printableUniqueKey(searcher.doc(iter.nextDoc(), fields))).append(','); } if (sb.length() > 0) { rb.rsp.addToLog("responseLog", sb.substring(0, sb.length() - 1)); } }
@SuppressWarnings({"rawtypes", "unchecked"}) private Iterator<Integer> docIterator(ResponseBuilder rb) { if (rb.grouping()) { List<Integer> docList = new ArrayList<>(); NamedList values = rb.rsp.getValues(); NamedList grouped = (NamedList) values.get("grouped"); for (String field : rb.getGroupingSpec().getFields()) { NamedList fieldResults = (NamedList) grouped.get(field); if (rb.getGroupingSpec().getResponseFormat() == Grouping.Format.grouped) { List<NamedList> groups = (List<NamedList>) fieldResults.get("groups"); for (NamedList group : groups) { for (DocIterator it = ((DocList) group.get("doclist")).iterator(); it.hasNext(); ) { docList.add(it.nextDoc()); } } } else { for (DocIterator it = ((DocList) fieldResults.get("doclist")).iterator(); it.hasNext(); ) { docList.add(it.nextDoc()); } } } return docList.iterator(); } else { return rb.getResults().docList.iterator(); } }
public static SolrCachingAuxDocScorer createAuxDocScorer( SolrIndexSearcher searcher, Similarity similarity, Query query, SolrIndexReader reader) throws IOException { // Get hold of solr top level searcher // Execute query with caching // translate reults to leaf docs // build ordered doc list DocSet auxDocSet = searcher.getDocSet(query); CacheEntry[] indexedByDocId = (CacheEntry[]) searcher.cacheLookup( AlfrescoSolrEventListener.ALFRESCO_CACHE, AlfrescoSolrEventListener.KEY_DBID_LEAF_PATH_BY_DOC_ID); // List<ScoreDoc> auxDocs = pathCollector.getDocs(); OpenBitSet translated = new OpenBitSet(); if (auxDocSet instanceof BitDocSet) { BitDocSet source = (BitDocSet) auxDocSet; OpenBitSet openBitSet = source.getBits(); int current = -1; while ((current = openBitSet.nextSetBit(current + 1)) != -1) { CacheEntry entry = indexedByDocId[current]; translated.set(entry.getLeaf()); } } else { for (DocIterator it = auxDocSet.iterator(); it.hasNext(); /* */ ) { CacheEntry entry = indexedByDocId[it.nextDoc()]; translated.set(entry.getLeaf()); } } return new SolrCachingAuxDocScorer(similarity, new BitDocSet(translated), reader); }
public NamedList get(String[] fields, DocSet baseDocs) throws IOException, ParseException { if (this.crcget == null) { this.container = this.parse.createContainer(fields, baseDocs, this.reader, this.searcher, this.req); DocIterator iter = baseDocs.iterator(); this.recordCount.inc(baseDocs.size()); Doclist res = new Doclist(this.parse.limit_offset); int doc = -1; while (iter.hasNext()) { doc = iter.nextDoc(); res.add(doc); if (res.index >= this.parse.limit_offset) { break; } } PriorityQueue<SelectDetailRow> topItems = this.transGroupValue(res, fields); this.container.free(); return this.toNameList(topItems); } String hostkey = String.valueOf(this.getkeyCrc()); ConcurrentHashMap<Long, String> cache = MdrillUtils.CRC_CACHE_SIZE.remove(crcget + "@" + hostkey); NamedList rtn = new NamedList(); Map<Long, String> crcvalue = new HashMap<Long, String>(); rtn.add("fdtcre", crcvalue); if (cache != null) { MapFieldSelector selector = new MapFieldSelector(fields); FieldType[] ftlist = new FieldType[fields.length]; IndexSchema schema = this.searcher.getSchema(); for (int j = 0; j < fields.length; j++) { ftlist[j] = schema.getFieldType(fields[j]); } String crcliststr = params.get("mdrill.crc.key.get.crclist"); if (crcliststr != null) { String[] crclist = crcliststr.split(","); for (String s : crclist) { Long crc = Long.parseLong(s); String v = cache.get(crc); if (v != null) { String cols[] = v.split(UniqConfig.GroupJoinString(), -1); if (cols.length >= 2) { int doc = Integer.parseInt(cols[0]); SortGroupVal buff = new SortGroupVal(); buff.groupbuff.append("-"); buff.groupbuff.append(UniqConfig.GroupJoinString()); buff.groupbuff.append("-"); Document docfields = this.reader.document(doc, selector); if (docfields == null) { for (int j = 0; j < fields.length; j++) { buff.groupbuff.append(UniqConfig.GroupJoinString()); buff.groupbuff.append(EncodeUtils.encode("-")); } if (!crcvalue.containsKey(crc)) { crcvalue.put(crc, buff.groupbuff.toString()); } } else { for (int j = 0; j < fields.length; j++) { buff.groupbuff.append(UniqConfig.GroupJoinString()); Fieldable fv = docfields.getFieldable(fields[j]); if (fv != null) { buff.groupbuff.append(ftlist[j].toExternal(fv)); } else { buff.groupbuff.append(EncodeUtils.encode("-")); } } crcvalue.put(crc, buff.groupbuff.toString()); } } } } } } return rtn; }
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { Sort sort = rb.getSortSpec().getSort(); SortField[] sortFields = sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort(); NamedList sortVals = new NamedList(); // order is important for the sort fields Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field SolrIndexReader reader = searcher.getReader(); SolrIndexReader[] readers = reader.getLeafReaders(); SolrIndexReader subReader = reader; if (readers.length == 1) { // if there is a single segment, use that subReader and avoid looking up each time subReader = readers[0]; readers = null; } int[] offsets = reader.getLeafOffsets(); for (SortField sortField : sortFields) { int type = sortField.getType(); if (type == SortField.SCORE || type == SortField.DOC) continue; FieldComparator comparator = null; FieldComparator comparators[] = (readers == null) ? null : new FieldComparator[readers.length]; String fieldname = sortField.getField(); FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname); DocList docList = rb.getResults().docList; ArrayList<Object> vals = new ArrayList<Object>(docList.size()); DocIterator it = rb.getResults().docList.iterator(); int offset = 0; int idx = 0; while (it.hasNext()) { int doc = it.nextDoc(); if (readers != null) { idx = SolrIndexReader.readerIndex(doc, offsets); subReader = readers[idx]; offset = offsets[idx]; comparator = comparators[idx]; } if (comparator == null) { comparator = sortField.getComparator(1, 0); comparator = comparator.setNextReader(subReader, offset); if (comparators != null) comparators[idx] = comparator; } doc -= offset; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); // Sortable float, double, int, long types all just use a string // comparator. For these, we need to put the type into a readable // format. One reason for this is that XML can't represent all // string values (or even all unicode code points). // indexedToReadable() should be a no-op and should // thus be harmless anyway (for all current ways anyway) if (val instanceof String) { field.setValue((String) val); val = ft.toObject(field); } // Must do the same conversion when sorting by a // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { field.setValue(((BytesRef) val).utf8ToString()); val = ft.toObject(field); } vals.add(val); } sortVals.add(fieldname, vals); } rsp.add("sort_values", sortVals); } }