/**
  * conversion from a SolrQueryResponse (which is a solr-internal data format) to SolrDocumentList
  * (which is a solrj-format) The conversion is done inside the solrj api using the
  * BinaryResponseWriter and a very complex unfolding process via
  * org.apache.solr.common.util.JavaBinCodec.marshal.
  *
  * @param request
  * @param sqr
  * @return
  */
 public SolrDocumentList SolrQueryResponse2SolrDocumentList(
     final SolrQueryRequest req, final SolrQueryResponse rsp) {
   SolrDocumentList sdl = new SolrDocumentList();
   NamedList<?> nl = rsp.getValues();
   ResultContext resultContext = (ResultContext) nl.get("response");
   DocList response =
       resultContext == null
           ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f)
           : resultContext.docs;
   sdl.setNumFound(response == null ? 0 : response.matches());
   sdl.setStart(response == null ? 0 : response.offset());
   String originalName = Thread.currentThread().getName();
   if (response != null) {
     try {
       SolrIndexSearcher searcher = req.getSearcher();
       final int responseCount = response.size();
       DocIterator iterator = response.iterator();
       for (int i = 0; i < responseCount; i++) {
         int docid = iterator.nextDoc();
         Thread.currentThread()
             .setName("EmbeddedSolrConnector.SolrQueryResponse2SolrDocumentList: " + docid);
         Document responsedoc = searcher.doc(docid, (Set<String>) null);
         SolrDocument sordoc = doc2SolrDoc(responsedoc);
         sdl.add(sordoc);
       }
     } catch (IOException e) {
       ConcurrentLog.logException(e);
     }
   }
   Thread.currentThread().setName(originalName);
   return sdl;
 }
  @Override
  public void writeDocList(String name, DocList ids, Set<String> fields, Map otherFields)
      throws IOException {
    boolean includeScore = false;
    if (fields != null) {
      includeScore = fields.contains("score");
      if (fields.size() == 0 || (fields.size() == 1 && includeScore) || fields.contains("*")) {
        fields = null; // null means return all stored fields
      }
    }

    int sz = ids.size();

    writeMapOpener(includeScore ? 4 : 3);
    incLevel();
    writeKey("numFound", false);
    writeInt(null, ids.matches());
    writeMapSeparator();
    writeKey("start", false);
    writeInt(null, ids.offset());

    if (includeScore) {
      writeMapSeparator();
      writeKey("maxScore", false);
      writeFloat(null, ids.maxScore());
    }
    writeMapSeparator();
    // indent();
    writeKey("docs", false);
    writeArrayOpener(sz);

    incLevel();
    boolean first = true;

    SolrIndexSearcher searcher = req.getSearcher();
    DocIterator iterator = ids.iterator();
    for (int i = 0; i < sz; i++) {
      int id = iterator.nextDoc();
      Document doc = searcher.doc(id, fields);

      if (first) {
        first = false;
      } else {
        writeArraySeparator();
      }
      indent();
      writeDoc(null, doc, fields, (includeScore ? iterator.score() : 0.0f), includeScore);
    }
    decLevel();
    writeArrayCloser();

    if (otherFields != null) {
      writeMap(null, otherFields, true, false);
    }

    decLevel();
    indent();
    writeMapCloser();
  }
  /**
   * Generates a list of Highlighted query fragments for each item in a list of documents, or
   * returns null if highlighting is disabled.
   *
   * @param docs query results
   * @param query the query
   * @param req the current request
   * @param defaultFields default list of fields to summarize
   * @return NamedList containing a NamedList for each document, which in turns contains sets
   *     (field, summary) pairs.
   */
  @Override
  @SuppressWarnings("unchecked")
  public NamedList<Object> doHighlighting(
      DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params)) return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    NamedList fragments = new SimpleOrderedMap();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
      // pre-fetch documents using the Searcher's doc cache
      for (String f : fieldNames) {
        fset.add(f);
      }
      // fetch unique key if one exists.
      SchemaField keyField = schema.getUniqueKeyField();
      if (null != keyField) fset.add(keyField.getName());
    }

    // get FastVectorHighlighter instance out of the processing loop
    FastVectorHighlighter fvh =
        new FastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false));
    fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader());

    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
      int docId = iterator.nextDoc();
      Document doc = searcher.doc(docId, fset);
      NamedList docSummaries = new SimpleOrderedMap();
      for (String fieldName : fieldNames) {
        fieldName = fieldName.trim();
        if (useFastVectorHighlighter(params, schema, fieldName))
          doHighlightingByFastVectorHighlighter(
              fvh, fieldQuery, req, docSummaries, docId, doc, fieldName);
        else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
      }
      String printId = schema.printableUniqueKey(doc);
      fragments.add(printId == null ? null : printId, docSummaries);
    }
    return fragments;
  }
Exemplo n.º 4
0
  protected void processIds(
      ResponseBuilder rb, DocList dl, IndexSchema schema, SolrIndexSearcher searcher)
      throws IOException {

    StringBuilder sb = new StringBuilder();

    Set<String> fields = Collections.singleton(schema.getUniqueKeyField().getName());
    for (DocIterator iter = dl.iterator(); iter.hasNext(); ) {

      sb.append(schema.printableUniqueKey(searcher.doc(iter.nextDoc(), fields))).append(',');
    }
    if (sb.length() > 0) {
      rb.rsp.addToLog("responseLog", sb.substring(0, sb.length() - 1));
    }
  }
  public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception {
    SolrCore core = h.getCore();
    IndexSchema schema = core.getLatestSchema();
    SolrInputDocument doc = new SolrInputDocument();

    final float DOC_BOOST = 3.0F;
    doc.setDocumentBoost(DOC_BOOST);
    doc.addField("id", "42");

    SolrInputField inTitle = new SolrInputField("title");
    inTitle.addValue("titleA", 2.0F);
    inTitle.addValue("titleB", 7.0F);
    final float TITLE_BOOST = 2.0F * 7.0F;
    assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F);
    doc.put(inTitle.getName(), inTitle);

    SolrInputField inFoo = new SolrInputField("foo_t");
    inFoo.addValue("summer time", 1.0F);
    inFoo.addValue("in the city", 5.0F);
    inFoo.addValue("living is easy", 11.0F);
    final float FOO_BOOST = 1.0F * 5.0F * 11.0F;
    assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F);
    doc.put(inFoo.getName(), inFoo);

    Document out = DocumentBuilder.toDocument(doc, schema);

    IndexableField[] outTitle = out.getFields(inTitle.getName());
    assertEquals("wrong number of title values", 2, outTitle.length);

    IndexableField[] outNoNorms = out.getFields("title_stringNoNorms");
    assertEquals("wrong number of nonorms values", 2, outNoNorms.length);

    IndexableField[] outFoo = out.getFields(inFoo.getName());
    assertEquals("wrong number of foo values", 3, outFoo.length);

    IndexableField[] outText = out.getFields("text");
    assertEquals("wrong number of text values", 5, outText.length);

    // since Lucene no longer has native document boosts, we should find
    // the doc boost multiplied into the boost on the first field value
    // of each field.  All other field values should be 1.0f
    // (lucene will multiply all of the field value boosts later)
    assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F);
    assertEquals(1.0F, outTitle[1].boost(), 0.0F);
    //
    assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F);
    assertEquals(1.0F, outFoo[1].boost(), 0.0F);
    assertEquals(1.0F, outFoo[2].boost(), 0.0F);
    //
    assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F);
    assertEquals(1.0F, outText[1].boost(), 0.0F);
    assertEquals(FOO_BOOST, outText[2].boost(), 0.0F);
    assertEquals(1.0F, outText[3].boost(), 0.0F);
    assertEquals(1.0F, outText[4].boost(), 0.0F);

    // copyField dest with no norms should not have recieved any boost
    assertEquals(1.0F, outNoNorms[0].boost(), 0.0F);
    assertEquals(1.0F, outNoNorms[1].boost(), 0.0F);

    // now index that SolrInputDocument to check the computed norms

    assertU(adoc(doc));
    assertU(commit());

    SolrQueryRequest req = req("q", "id:42");
    try {
      // very hack-ish

      SolrQueryResponse rsp = new SolrQueryResponse();
      core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp);

      DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
      assertTrue("can't find the doc we just added", 1 == dl.size());
      int docid = dl.iterator().nextDoc();

      SolrIndexSearcher searcher = req.getSearcher();
      AtomicReader reader =
          SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader());

      assertTrue(
          "similarity doesn't extend DefaultSimilarity, "
              + "config or defaults have changed since test was written",
          searcher.getSimilarity() instanceof DefaultSimilarity);

      DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity();

      NumericDocValues titleNorms = reader.getNormValues("title");
      NumericDocValues fooNorms = reader.getNormValues("foo_t");
      NumericDocValues textNorms = reader.getNormValues("text");

      assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST), titleNorms.get(docid));

      assertEquals(expectedNorm(sim, 8 - 3, FOO_BOOST * DOC_BOOST), fooNorms.get(docid));

      assertEquals(
          expectedNorm(sim, 2 + 8 - 3, TITLE_BOOST * FOO_BOOST * DOC_BOOST), textNorms.get(docid));

    } finally {
      req.close();
    }
  }
Exemplo n.º 6
0
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
      uniqFieldName = keyField.getName();
      termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    // figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    // short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
      allFields.termFreq = true;
      allFields.positions = true;
      allFields.offsets = true;
      allFields.docFreq = true;
      allFields.tfIdf = true;
    }

    // Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
      // we have specific fields to retrieve, or no fields
      for (String field : fields) {

        // workarround SOLR-3523
        if (null == field || "score".equals(field)) continue;

        // we don't want to issue warnings about the uniqueKey field
        // since it can cause lots of confusion in distributed requests
        // where the uniqueKey field is injected into the fl for merging
        final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
          if (sf.storeTermVector()) {
            FieldOptions option = fieldOptions.get(field);
            if (option == null) {
              option = new FieldOptions();
              option.fieldName = field;
              fieldOptions.put(field, option);
            }
            // get the per field mappings
            option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
            option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
            option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
            // Validate these are even an option
            option.positions =
                params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
            if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
              noPos.add(field);
            }
            option.offsets =
                params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
            if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
              noOff.add(field);
            }
          } else { // field doesn't have term vectors
            if (!fieldIsUniqueKey) noTV.add(field);
          }
        } else {
          // field doesn't exist
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
        }
      }
    } // else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings
    // are added that might be differnet between shards, finishStage() needs
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
      warnings.add("noTermVectors", noTV);
      hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
      warnings.add("noPositions", noPos);
      hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
      warnings.add("noOffsets", noOff);
      hasWarnings = true;
    }
    if (hasWarnings) {
      termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
      iter = docIds.iterator();
    } else {
      DocList list = listAndSet.docList;
      iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    // Only load the id field to get the uniqueKey of that
    // field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue =
        new StoredFieldVisitor() {
          @Override
          public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
          }

          @Override
          public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
          }

          @Override
          public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
          }

          @Override
          public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
          }
        };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
      Integer docId = iter.next();
      NamedList<Object> docNL = new NamedList<Object>();

      if (keyField != null) {
        reader.document(docId, getUniqValue);
        String uniqVal = null;
        if (uniqValues.size() != 0) {
          uniqVal = uniqValues.get(0);
          uniqValues.clear();
          docNL.add("uniqueKey", uniqVal);
          termVectors.add(uniqVal, docNL);
        }
      } else {
        // support for schemas w/o a unique key,
        termVectors.add("doc-" + docId, docNL);
      }

      if (null != fields) {
        for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
          final String field = entry.getKey();
          final Terms vector = reader.getTermVector(docId, field);
          if (vector != null) {
            termsEnum = vector.iterator(termsEnum);
            mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
          }
        }
      } else {
        // extract all fields
        final Fields vectors = reader.getTermVectors(docId);
        for (String field : vectors) {
          Terms terms = vectors.terms(field);
          if (terms != null) {
            termsEnum = terms.iterator(termsEnum);
            mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
          }
        }
      }
    }
  }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }

    NamedList termVectors = new NamedList();
    rb.rsp.add(TERM_VECTORS, termVectors);
    FieldOptions allFields = new FieldOptions();
    // figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    // short cut to all values.
    boolean all = params.getBool(TermVectorParams.ALL, false);
    if (all == true) {
      allFields.termFreq = true;
      allFields.positions = true;
      allFields.offsets = true;
      allFields.docFreq = true;
      allFields.tfIdf = true;
    }

    String fldLst = params.get(TermVectorParams.FIELDS);
    if (fldLst == null) {
      fldLst = params.get(CommonParams.FL);
    }

    // use this to validate our fields
    IndexSchema schema = rb.req.getSchema();
    // Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList warnings = new NamedList();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    // we have specific fields to retrieve
    if (fldLst != null) {
      String[] fields = SolrPluginUtils.split(fldLst);
      for (String field : fields) {
        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
          if (sf.storeTermVector()) {
            FieldOptions option = fieldOptions.get(field);
            if (option == null) {
              option = new FieldOptions();
              option.fieldName = field;
              fieldOptions.put(field, option);
            }
            // get the per field mappings
            option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
            option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
            option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
            // Validate these are even an option
            option.positions =
                params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
            if (option.positions == true && sf.storeTermPositions() == false) {
              noPos.add(field);
            }
            option.offsets =
                params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
            if (option.offsets == true && sf.storeTermOffsets() == false) {
              noOff.add(field);
            }
          } else { // field doesn't have term vectors
            noTV.add(field);
          }
        } else {
          // field doesn't exist
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
        }
      }
    } // else, deal with all fields
    boolean hasWarnings = false;
    if (noTV.isEmpty() == false) {
      warnings.add("noTermVectors", noTV);
      hasWarnings = true;
    }
    if (noPos.isEmpty() == false) {
      warnings.add("noPositions", noPos);
      hasWarnings = true;
    }
    if (noOff.isEmpty() == false) {
      warnings.add("noOffsets", noOff);
      hasWarnings = true;
    }
    if (hasWarnings == true) {
      termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && docIds.isEmpty() == false) {
      iter = docIds.iterator();
    } else {
      DocList list = listAndSet.docList;
      iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getReader();
    // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
      uniqFieldName = keyField.getName();
    }
    // Only load the id field to get the uniqueKey of that field
    SetBasedFieldSelector fieldSelector =
        new SetBasedFieldSelector(
            Collections.singleton(uniqFieldName), Collections.<String>emptySet());
    TVMapper mapper = new TVMapper(reader);
    mapper.fieldOptions =
        allFields; // this will only stay set if fieldOptions.isEmpty() (in other words, only if the
                   // user didn't set any fields)
    while (iter.hasNext()) {
      Integer docId = iter.next();
      NamedList docNL = new NamedList();
      mapper.docNL = docNL;
      termVectors.add("doc-" + docId, docNL);

      if (keyField != null) {
        Document document = reader.document(docId, fieldSelector);
        Fieldable uniqId = document.getFieldable(uniqFieldName);
        String uniqVal = null;
        if (uniqId != null) {
          uniqVal = keyField.getType().storedToReadable(uniqId);
        }
        if (uniqVal != null) {
          docNL.add("uniqueKey", uniqVal);
          termVectors.add("uniqueKeyFieldName", uniqFieldName);
        }
      }
      if (fieldOptions.isEmpty() == false) {
        for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
          mapper.fieldOptions = entry.getValue();
          reader.getTermFreqVector(docId, entry.getKey(), mapper);
        }
      } else {
        // deal with all fields by using the allFieldMapper
        reader.getTermFreqVector(docId, mapper);
      }
    }
  }