/** * conversion from a SolrQueryResponse (which is a solr-internal data format) to SolrDocumentList * (which is a solrj-format) The conversion is done inside the solrj api using the * BinaryResponseWriter and a very complex unfolding process via * org.apache.solr.common.util.JavaBinCodec.marshal. * * @param request * @param sqr * @return */ public SolrDocumentList SolrQueryResponse2SolrDocumentList( final SolrQueryRequest req, final SolrQueryResponse rsp) { SolrDocumentList sdl = new SolrDocumentList(); NamedList<?> nl = rsp.getValues(); ResultContext resultContext = (ResultContext) nl.get("response"); DocList response = resultContext == null ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f) : resultContext.docs; sdl.setNumFound(response == null ? 0 : response.matches()); sdl.setStart(response == null ? 0 : response.offset()); String originalName = Thread.currentThread().getName(); if (response != null) { try { SolrIndexSearcher searcher = req.getSearcher(); final int responseCount = response.size(); DocIterator iterator = response.iterator(); for (int i = 0; i < responseCount; i++) { int docid = iterator.nextDoc(); Thread.currentThread() .setName("EmbeddedSolrConnector.SolrQueryResponse2SolrDocumentList: " + docid); Document responsedoc = searcher.doc(docid, (Set<String>) null); SolrDocument sordoc = doc2SolrDoc(responsedoc); sdl.add(sordoc); } } catch (IOException e) { ConcurrentLog.logException(e); } } Thread.currentThread().setName(originalName); return sdl; }
@Override public void writeDocList(String name, DocList ids, Set<String> fields, Map otherFields) throws IOException { boolean includeScore = false; if (fields != null) { includeScore = fields.contains("score"); if (fields.size() == 0 || (fields.size() == 1 && includeScore) || fields.contains("*")) { fields = null; // null means return all stored fields } } int sz = ids.size(); writeMapOpener(includeScore ? 4 : 3); incLevel(); writeKey("numFound", false); writeInt(null, ids.matches()); writeMapSeparator(); writeKey("start", false); writeInt(null, ids.offset()); if (includeScore) { writeMapSeparator(); writeKey("maxScore", false); writeFloat(null, ids.maxScore()); } writeMapSeparator(); // indent(); writeKey("docs", false); writeArrayOpener(sz); incLevel(); boolean first = true; SolrIndexSearcher searcher = req.getSearcher(); DocIterator iterator = ids.iterator(); for (int i = 0; i < sz; i++) { int id = iterator.nextDoc(); Document doc = searcher.doc(id, fields); if (first) { first = false; } else { writeArraySeparator(); } indent(); writeDoc(null, doc, fields, (includeScore ? iterator.score() : 0.0f), includeScore); } decLevel(); writeArrayCloser(); if (otherFields != null) { writeMap(null, otherFields, true, false); } decLevel(); indent(); writeMapCloser(); }
/** * Generates a list of Highlighted query fragments for each item in a list of documents, or * returns null if highlighting is disabled. * * @param docs query results * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * @return NamedList containing a NamedList for each document, which in turns contains sets * (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting( DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
protected void processIds( ResponseBuilder rb, DocList dl, IndexSchema schema, SolrIndexSearcher searcher) throws IOException { StringBuilder sb = new StringBuilder(); Set<String> fields = Collections.singleton(schema.getUniqueKeyField().getName()); for (DocIterator iter = dl.iterator(); iter.hasNext(); ) { sb.append(schema.printableUniqueKey(searcher.doc(iter.nextDoc(), fields))).append(','); } if (sb.length() > 0) { rb.rsp.addToLog("responseLog", sb.substring(0, sb.length() - 1)); } }
public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); SolrInputDocument doc = new SolrInputDocument(); final float DOC_BOOST = 3.0F; doc.setDocumentBoost(DOC_BOOST); doc.addField("id", "42"); SolrInputField inTitle = new SolrInputField("title"); inTitle.addValue("titleA", 2.0F); inTitle.addValue("titleB", 7.0F); final float TITLE_BOOST = 2.0F * 7.0F; assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F); doc.put(inTitle.getName(), inTitle); SolrInputField inFoo = new SolrInputField("foo_t"); inFoo.addValue("summer time", 1.0F); inFoo.addValue("in the city", 5.0F); inFoo.addValue("living is easy", 11.0F); final float FOO_BOOST = 1.0F * 5.0F * 11.0F; assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F); doc.put(inFoo.getName(), inFoo); Document out = DocumentBuilder.toDocument(doc, schema); IndexableField[] outTitle = out.getFields(inTitle.getName()); assertEquals("wrong number of title values", 2, outTitle.length); IndexableField[] outNoNorms = out.getFields("title_stringNoNorms"); assertEquals("wrong number of nonorms values", 2, outNoNorms.length); IndexableField[] outFoo = out.getFields(inFoo.getName()); assertEquals("wrong number of foo values", 3, outFoo.length); IndexableField[] outText = out.getFields("text"); assertEquals("wrong number of text values", 5, outText.length); // since Lucene no longer has native document boosts, we should find // the doc boost multiplied into the boost on the first field value // of each field. All other field values should be 1.0f // (lucene will multiply all of the field value boosts later) assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F); assertEquals(1.0F, outTitle[1].boost(), 0.0F); // assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F); assertEquals(1.0F, outFoo[1].boost(), 0.0F); assertEquals(1.0F, outFoo[2].boost(), 0.0F); // assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F); assertEquals(1.0F, outText[1].boost(), 0.0F); assertEquals(FOO_BOOST, outText[2].boost(), 0.0F); assertEquals(1.0F, outText[3].boost(), 0.0F); assertEquals(1.0F, outText[4].boost(), 0.0F); // copyField dest with no norms should not have recieved any boost assertEquals(1.0F, outNoNorms[0].boost(), 0.0F); assertEquals(1.0F, outNoNorms[1].boost(), 0.0F); // now index that SolrInputDocument to check the computed norms assertU(adoc(doc)); assertU(commit()); SolrQueryRequest req = req("q", "id:42"); try { // very hack-ish SolrQueryResponse rsp = new SolrQueryResponse(); core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp); DocList dl = ((ResultContext) rsp.getValues().get("response")).docs; assertTrue("can't find the doc we just added", 1 == dl.size()); int docid = dl.iterator().nextDoc(); SolrIndexSearcher searcher = req.getSearcher(); AtomicReader reader = SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader()); assertTrue( "similarity doesn't extend DefaultSimilarity, " + "config or defaults have changed since test was written", searcher.getSimilarity() instanceof DefaultSimilarity); DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity(); NumericDocValues titleNorms = reader.getNormValues("title"); NumericDocValues fooNorms = reader.getNormValues("foo_t"); NumericDocValues textNorms = reader.getNormValues("text"); assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST), titleNorms.get(docid)); assertEquals(expectedNorm(sim, 8 - 3, FOO_BOOST * DOC_BOOST), fooNorms.get(docid)); assertEquals( expectedNorm(sim, 2 + 8 - 3, TITLE_BOOST * FOO_BOOST * DOC_BOOST), textNorms.get(docid)); } finally { req.close(); } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } NamedList<Object> termVectors = new NamedList<Object>(); rb.rsp.add(TERM_VECTORS, termVectors); IndexSchema schema = rb.req.getSchema(); SchemaField keyField = schema.getUniqueKeyField(); String uniqFieldName = null; if (keyField != null) { uniqFieldName = keyField.getName(); termVectors.add("uniqueKeyFieldName", uniqFieldName); } FieldOptions allFields = new FieldOptions(); // figure out what options we have, and try to get the appropriate vector allFields.termFreq = params.getBool(TermVectorParams.TF, false); allFields.positions = params.getBool(TermVectorParams.POSITIONS, false); allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false); allFields.docFreq = params.getBool(TermVectorParams.DF, false); allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false); // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false); // short cut to all values. if (params.getBool(TermVectorParams.ALL, false)) { allFields.termFreq = true; allFields.positions = true; allFields.offsets = true; allFields.docFreq = true; allFields.tfIdf = true; } // Build up our per field mapping Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>(); NamedList<List<String>> warnings = new NamedList<List<String>>(); List<String> noTV = new ArrayList<String>(); List<String> noPos = new ArrayList<String>(); List<String> noOff = new ArrayList<String>(); Set<String> fields = getFields(rb); if (null != fields) { // we have specific fields to retrieve, or no fields for (String field : fields) { // workarround SOLR-3523 if (null == field || "score".equals(field)) continue; // we don't want to issue warnings about the uniqueKey field // since it can cause lots of confusion in distributed requests // where the uniqueKey field is injected into the fl for merging final boolean fieldIsUniqueKey = field.equals(uniqFieldName); SchemaField sf = schema.getFieldOrNull(field); if (sf != null) { if (sf.storeTermVector()) { FieldOptions option = fieldOptions.get(field); if (option == null) { option = new FieldOptions(); option.fieldName = field; fieldOptions.put(field, option); } // get the per field mappings option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq); option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq); option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf); // Validate these are even an option option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions); if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) { noPos.add(field); } option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets); if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) { noOff.add(field); } } else { // field doesn't have term vectors if (!fieldIsUniqueKey) noTV.add(field); } } else { // field doesn't exist throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field); } } } // else, deal with all fields // NOTE: currently all typs of warnings are schema driven, and garunteed // to be consistent across all shards - if additional types of warnings // are added that might be differnet between shards, finishStage() needs // to be changed to account for that. boolean hasWarnings = false; if (!noTV.isEmpty()) { warnings.add("noTermVectors", noTV); hasWarnings = true; } if (!noPos.isEmpty()) { warnings.add("noPositions", noPos); hasWarnings = true; } if (!noOff.isEmpty()) { warnings.add("noOffsets", noOff); hasWarnings = true; } if (hasWarnings) { termVectors.add("warnings", warnings); } DocListAndSet listAndSet = rb.getResults(); List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS)); Iterator<Integer> iter; if (docIds != null && !docIds.isEmpty()) { iter = docIds.iterator(); } else { DocList list = listAndSet.docList; iter = list.iterator(); } SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getIndexReader(); // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors // Only load the id field to get the uniqueKey of that // field final String finalUniqFieldName = uniqFieldName; final List<String> uniqValues = new ArrayList<String>(); // TODO: is this required to be single-valued? if so, we should STOP // once we find it... final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() { @Override public void stringField(FieldInfo fieldInfo, String value) { uniqValues.add(value); } @Override public void intField(FieldInfo fieldInfo, int value) { uniqValues.add(Integer.toString(value)); } @Override public void longField(FieldInfo fieldInfo, long value) { uniqValues.add(Long.toString(value)); } @Override public Status needsField(FieldInfo fieldInfo) { return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO; } }; TermsEnum termsEnum = null; while (iter.hasNext()) { Integer docId = iter.next(); NamedList<Object> docNL = new NamedList<Object>(); if (keyField != null) { reader.document(docId, getUniqValue); String uniqVal = null; if (uniqValues.size() != 0) { uniqVal = uniqValues.get(0); uniqValues.clear(); docNL.add("uniqueKey", uniqVal); termVectors.add(uniqVal, docNL); } } else { // support for schemas w/o a unique key, termVectors.add("doc-" + docId, docNL); } if (null != fields) { for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) { final String field = entry.getKey(); final Terms vector = reader.getTermVector(docId, field); if (vector != null) { termsEnum = vector.iterator(termsEnum); mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field); } } } else { // extract all fields final Fields vectors = reader.getTermVectors(docId); for (String field : vectors) { Terms terms = vectors.terms(field); if (terms != null) { termsEnum = terms.iterator(termsEnum); mapOneVector(docNL, allFields, reader, docId, termsEnum, field); } } } } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } NamedList termVectors = new NamedList(); rb.rsp.add(TERM_VECTORS, termVectors); FieldOptions allFields = new FieldOptions(); // figure out what options we have, and try to get the appropriate vector allFields.termFreq = params.getBool(TermVectorParams.TF, false); allFields.positions = params.getBool(TermVectorParams.POSITIONS, false); allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false); allFields.docFreq = params.getBool(TermVectorParams.DF, false); allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false); // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false); // short cut to all values. boolean all = params.getBool(TermVectorParams.ALL, false); if (all == true) { allFields.termFreq = true; allFields.positions = true; allFields.offsets = true; allFields.docFreq = true; allFields.tfIdf = true; } String fldLst = params.get(TermVectorParams.FIELDS); if (fldLst == null) { fldLst = params.get(CommonParams.FL); } // use this to validate our fields IndexSchema schema = rb.req.getSchema(); // Build up our per field mapping Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>(); NamedList warnings = new NamedList(); List<String> noTV = new ArrayList<String>(); List<String> noPos = new ArrayList<String>(); List<String> noOff = new ArrayList<String>(); // we have specific fields to retrieve if (fldLst != null) { String[] fields = SolrPluginUtils.split(fldLst); for (String field : fields) { SchemaField sf = schema.getFieldOrNull(field); if (sf != null) { if (sf.storeTermVector()) { FieldOptions option = fieldOptions.get(field); if (option == null) { option = new FieldOptions(); option.fieldName = field; fieldOptions.put(field, option); } // get the per field mappings option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq); option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq); option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf); // Validate these are even an option option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions); if (option.positions == true && sf.storeTermPositions() == false) { noPos.add(field); } option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets); if (option.offsets == true && sf.storeTermOffsets() == false) { noOff.add(field); } } else { // field doesn't have term vectors noTV.add(field); } } else { // field doesn't exist throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field); } } } // else, deal with all fields boolean hasWarnings = false; if (noTV.isEmpty() == false) { warnings.add("noTermVectors", noTV); hasWarnings = true; } if (noPos.isEmpty() == false) { warnings.add("noPositions", noPos); hasWarnings = true; } if (noOff.isEmpty() == false) { warnings.add("noOffsets", noOff); hasWarnings = true; } if (hasWarnings == true) { termVectors.add("warnings", warnings); } DocListAndSet listAndSet = rb.getResults(); List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS)); Iterator<Integer> iter; if (docIds != null && docIds.isEmpty() == false) { iter = docIds.iterator(); } else { DocList list = listAndSet.docList; iter = list.iterator(); } SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getReader(); // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors SchemaField keyField = schema.getUniqueKeyField(); String uniqFieldName = null; if (keyField != null) { uniqFieldName = keyField.getName(); } // Only load the id field to get the uniqueKey of that field SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector( Collections.singleton(uniqFieldName), Collections.<String>emptySet()); TVMapper mapper = new TVMapper(reader); mapper.fieldOptions = allFields; // this will only stay set if fieldOptions.isEmpty() (in other words, only if the // user didn't set any fields) while (iter.hasNext()) { Integer docId = iter.next(); NamedList docNL = new NamedList(); mapper.docNL = docNL; termVectors.add("doc-" + docId, docNL); if (keyField != null) { Document document = reader.document(docId, fieldSelector); Fieldable uniqId = document.getFieldable(uniqFieldName); String uniqVal = null; if (uniqId != null) { uniqVal = keyField.getType().storedToReadable(uniqId); } if (uniqVal != null) { docNL.add("uniqueKey", uniqVal); termVectors.add("uniqueKeyFieldName", uniqFieldName); } } if (fieldOptions.isEmpty() == false) { for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) { mapper.fieldOptions = entry.getValue(); reader.getTermFreqVector(docId, entry.getKey(), mapper); } } else { // deal with all fields by using the allFieldMapper reader.getTermFreqVector(docId, mapper); } } }