Exemplo n.º 1
0
  @Override
  public void finishStage(ResponseBuilder rb) {
    if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {

      NamedList termVectors = new NamedList<Object>();
      Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];

      for (ShardRequest sreq : rb.finished) {
        if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) == 0
            || !sreq.params.getBool(COMPONENT_NAME, false)) {
          continue;
        }
        for (ShardResponse srsp : sreq.responses) {
          NamedList<Object> nl =
              (NamedList<Object>) srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
          for (int i = 0; i < nl.size(); i++) {
            String key = nl.getName(i);
            ShardDoc sdoc = rb.resultIds.get(key);
            if (null == sdoc) {
              // metadata, only need from one node, leave in order
              if (termVectors.indexOf(key, 0) < 0) {
                termVectors.add(key, nl.getVal(i));
              }
            } else {
              int idx = sdoc.positionInResponse;
              arr[idx] = new NamedList.NamedListEntry<Object>(key, nl.getVal(i));
            }
          }
        }
      }
      // remove nulls in case not all docs were able to be retrieved
      termVectors.addAll(SolrPluginUtils.removeNulls(new NamedList<Object>(arr)));
      rb.rsp.add(TERM_VECTORS, termVectors);
    }
  }
Exemplo n.º 2
0
  /**
   * Helper method for determining the list of fields that we should try to find term vectors on.
   *
   * <p>Does simple (non-glob-supporting) parsing on the {@link TermVectorParams#FIELDS} param if
   * specified, otherwise it returns the concrete field values specified in {@link CommonParams#FL}
   * -- ignoring functions, transformers, or literals.
   *
   * <p>If "fl=*" is used, or neither param is specified, then <code>null</code> will be returned.
   * If the empty set is returned, it means the "fl" specified consisted entirely of things that are
   * not real fields (ie: functions, transformers, partial-globs, score, etc...) and not supported
   * by this component.
   */
  private Set<String> getFields(ResponseBuilder rb) {
    SolrParams params = rb.req.getParams();
    String[] fldLst = params.getParams(TermVectorParams.FIELDS);
    if (null == fldLst || 0 == fldLst.length || (1 == fldLst.length && 0 == fldLst[0].length())) {

      // no tv.fl, parse the main fl
      ReturnFields rf = new SolrReturnFields(params.getParams(CommonParams.FL), rb.req);

      if (rf.wantsAllFields()) {
        return null;
      }

      Set<String> fieldNames = rf.getLuceneFieldNames();
      return (null != fieldNames)
          ? fieldNames
          :
          // return empty set indicating no fields should be used
          Collections.<String>emptySet();
    }

    // otherwise us the raw fldList as is, no special parsing or globs
    Set<String> fieldNames = new LinkedHashSet<String>();
    for (String fl : fldLst) {
      fieldNames.addAll(Arrays.asList(SolrPluginUtils.split(fl)));
    }
    return fieldNames;
  }
Exemplo n.º 3
0
 protected void doPrefetch(ResponseBuilder rb) throws IOException {
   SolrQueryRequest req = rb.req;
   SolrQueryResponse rsp = rb.rsp;
   // pre-fetch returned documents
   if (!req.getParams().getBool(ShardParams.IS_SHARD, false)
       && rb.getResults().docList != null
       && rb.getResults().docList.size() <= 50) {
     // TODO: this may depend on the highlighter component (or other components?)
     SolrPluginUtils.optimizePreFetchDocs(rb.getResults().docList, rb.getQuery(), req, rsp);
   }
 }
 @Override
 public void transform(SolrDocument doc, int docid) {
   if (context != null && context.query != null) {
     try {
       Explanation exp = context.searcher.explain(context.query, docid);
       if (style == Style.nl) {
         doc.setField(name, SolrPluginUtils.explanationToNamedList(exp));
       } else if (style == Style.html) {
         doc.setField(name, exp.toHtml());
       } else {
         doc.setField(name, exp.toString());
       }
     } catch (IOException e) {
       e.printStackTrace();
     }
   }
 }
Exemplo n.º 5
0
  @SuppressWarnings("rawtypes")
  @Override
  public void init(NamedList args) {

    defaultConfig = new SolrParserConfigParams();

    NamedList defs = (NamedList) args.get("defaults");
    if (defs == null) {
      defs = new NamedList();
    }

    if (defs.get("virtual-fields") != null) {
      NamedList vf = (NamedList) defs.get("virtual-fields");
      for (int i = 0; i < vf.size(); i++) {
        String fName = vf.getName(i);
        String fValue = (String) vf.getVal(i);
        defaultConfig.virtualFields.put(fName, SolrPluginUtils.parseFieldBoosts(fValue));
      }
    }
  }
Exemplo n.º 6
0
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }
    String name = getClusteringEngineName(rb);
    boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
    if (useResults == true) {
      SearchClusteringEngine engine = getSearchClusteringEngine(rb);
      if (engine != null) {
        DocListAndSet results = rb.getResults();
        Map<SolrDocument, Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
        SolrDocumentList solrDocList =
            SolrPluginUtils.docListToSolrDocumentList(
                results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
        Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
        rb.rsp.add("clusters", clusters);
      } else {
        log.warn("No engine for: " + name);
      }
    }
    boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
    if (useCollection == true) {
      DocumentClusteringEngine engine = documentClusteringEngines.get(name);
      if (engine != null) {
        boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
        NamedList<?> nl = null;

        // TODO: This likely needs to be made into a background task that runs in an executor
        if (useDocSet == true) {
          nl = engine.cluster(rb.getResults().docSet, params);
        } else {
          nl = engine.cluster(params);
        }
        rb.rsp.add("clusters", nl);
      } else {
        log.warn("No engine for " + name);
      }
    }
  }
  @Override
  public void finishStage(ResponseBuilder rb) {
    if (rb.doHighlights && rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {

      Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];

      // TODO: make a generic routine to do automatic merging of id keyed data
      for (ShardRequest sreq : rb.finished) {
        if ((sreq.purpose & ShardRequest.PURPOSE_GET_HIGHLIGHTS) == 0) continue;
        for (ShardResponse srsp : sreq.responses) {
          NamedList hl = (NamedList) srsp.getSolrResponse().getResponse().get("highlighting");
          for (int i = 0; i < hl.size(); i++) {
            String id = hl.getName(i);
            ShardDoc sdoc = rb.resultIds.get(id);
            int idx = sdoc.positionInResponse;
            arr[idx] = new NamedList.NamedListEntry<Object>(id, hl.getVal(i));
          }
        }
      }

      // remove nulls in case not all docs were able to be retrieved
      rb.rsp.add("highlighting", SolrPluginUtils.removeNulls(new SimpleOrderedMap(arr)));
    }
  }
    public SimpleQParser(
        String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {

      super(qstr, localParams, params, req);
      // Some of the parameters may come in through localParams, so combine them with params.
      SolrParams defaultParams = SolrParams.wrapDefaults(localParams, params);

      // This will be used to specify what fields and boosts will be used by SimpleQueryParser.
      Map<String, Float> queryFields =
          SolrPluginUtils.parseFieldBoosts(defaultParams.get(SimpleParams.QF));

      if (queryFields.isEmpty()) {
        // It qf is not specified setup up the queryFields map to use the defaultField.
        String defaultField =
            QueryParsing.getDefaultField(req.getSchema(), defaultParams.get(CommonParams.DF));

        if (defaultField == null) {
          // A query cannot be run without having a field or set of fields to run against.
          throw new IllegalStateException(
              "Neither "
                  + SimpleParams.QF
                  + ", "
                  + CommonParams.DF
                  + ", nor the default search field are present.");
        }

        queryFields.put(defaultField, 1.0F);
      } else {
        for (Map.Entry<String, Float> queryField : queryFields.entrySet()) {
          if (queryField.getValue() == null) {
            // Some fields may be specified without a boost, so default the boost to 1.0 since a
            // null value
            // will not be accepted by SimpleQueryParser.
            queryField.setValue(1.0F);
          }
        }
      }

      // Setup the operations that are enabled for the query.
      int enabledOps = 0;
      String opParam = defaultParams.get(SimpleParams.QO);

      if (opParam == null) {
        // All operations will be enabled.
        enabledOps = -1;
      } else {
        // Parse the specified enabled operations to be used by the query.
        String[] operations = opParam.split(",");

        for (String operation : operations) {
          Integer enabledOp = OPERATORS.get(operation.trim().toUpperCase(Locale.ROOT));

          if (enabledOp != null) {
            enabledOps |= enabledOp;
          }
        }
      }

      // Create a SimpleQueryParser using the analyzer from the schema.
      final IndexSchema schema = req.getSchema();
      parser =
          new SolrSimpleQueryParser(
              req.getSchema().getQueryAnalyzer(), queryFields, enabledOps, this, schema);

      // Set the default operator to be either 'AND' or 'OR' for the query.
      QueryParser.Operator defaultOp =
          QueryParsing.getQueryParserDefaultOperator(
              req.getSchema(), defaultParams.get(QueryParsing.OP));

      if (defaultOp == QueryParser.Operator.AND) {
        parser.setDefaultOperator(BooleanClause.Occur.MUST);
      }
    }
Exemplo n.º 9
0
  @Override
  public void prepare(ResponseBuilder rb) throws IOException {

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }
    SolrQueryResponse rsp = rb.rsp;

    // Set field flags
    String fl = params.get(CommonParams.FL);
    int fieldFlags = 0;
    if (fl != null) {
      fieldFlags |= SolrPluginUtils.setReturnFields(fl, rsp);
    }
    rb.setFieldFlags(fieldFlags);

    String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);

    // get it from the response builder to give a different component a chance
    // to set it.
    String queryString = rb.getQueryString();
    if (queryString == null) {
      // this is the normal way it's set.
      queryString = params.get(CommonParams.Q);
      rb.setQueryString(queryString);
    }

    try {
      QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
      Query q = parser.getQuery();
      if (q == null) {
        // normalize a null query to a query that matches nothing
        q = new BooleanQuery();
      }
      rb.setQuery(q);
      rb.setSortSpec(parser.getSort(true));
      rb.setQparser(parser);

      String[] fqs = req.getParams().getParams(CommonParams.FQ);
      if (fqs != null && fqs.length != 0) {
        List<Query> filters = rb.getFilters();
        if (filters == null) {
          filters = new ArrayList<Query>();
          rb.setFilters(filters);
        }
        for (String fq : fqs) {
          if (fq != null && fq.trim().length() != 0) {
            QParser fqp = QParser.getParser(fq, null, req);
            filters.add(fqp.getQuery());
          }
        }
      }
    } catch (ParseException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
    }

    // TODO: temporary... this should go in a different component.
    String shards = params.get(ShardParams.SHARDS);
    if (shards != null) {
      List<String> lst = StrUtils.splitSmart(shards, ",", true);
      rb.shards = lst.toArray(new String[lst.size()]);
    }
    String shards_rows = params.get(ShardParams.SHARDS_ROWS);
    if (shards_rows != null) {
      rb.shards_rows = Integer.parseInt(shards_rows);
    }
    String shards_start = params.get(ShardParams.SHARDS_START);
    if (shards_start != null) {
      rb.shards_start = Integer.parseInt(shards_start);
    }
  }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }

    NamedList termVectors = new NamedList();
    rb.rsp.add(TERM_VECTORS, termVectors);
    FieldOptions allFields = new FieldOptions();
    // figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    // short cut to all values.
    boolean all = params.getBool(TermVectorParams.ALL, false);
    if (all == true) {
      allFields.termFreq = true;
      allFields.positions = true;
      allFields.offsets = true;
      allFields.docFreq = true;
      allFields.tfIdf = true;
    }

    String fldLst = params.get(TermVectorParams.FIELDS);
    if (fldLst == null) {
      fldLst = params.get(CommonParams.FL);
    }

    // use this to validate our fields
    IndexSchema schema = rb.req.getSchema();
    // Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList warnings = new NamedList();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    // we have specific fields to retrieve
    if (fldLst != null) {
      String[] fields = SolrPluginUtils.split(fldLst);
      for (String field : fields) {
        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
          if (sf.storeTermVector()) {
            FieldOptions option = fieldOptions.get(field);
            if (option == null) {
              option = new FieldOptions();
              option.fieldName = field;
              fieldOptions.put(field, option);
            }
            // get the per field mappings
            option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
            option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
            option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
            // Validate these are even an option
            option.positions =
                params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
            if (option.positions == true && sf.storeTermPositions() == false) {
              noPos.add(field);
            }
            option.offsets =
                params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
            if (option.offsets == true && sf.storeTermOffsets() == false) {
              noOff.add(field);
            }
          } else { // field doesn't have term vectors
            noTV.add(field);
          }
        } else {
          // field doesn't exist
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
        }
      }
    } // else, deal with all fields
    boolean hasWarnings = false;
    if (noTV.isEmpty() == false) {
      warnings.add("noTermVectors", noTV);
      hasWarnings = true;
    }
    if (noPos.isEmpty() == false) {
      warnings.add("noPositions", noPos);
      hasWarnings = true;
    }
    if (noOff.isEmpty() == false) {
      warnings.add("noOffsets", noOff);
      hasWarnings = true;
    }
    if (hasWarnings == true) {
      termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && docIds.isEmpty() == false) {
      iter = docIds.iterator();
    } else {
      DocList list = listAndSet.docList;
      iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getReader();
    // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
      uniqFieldName = keyField.getName();
    }
    // Only load the id field to get the uniqueKey of that field
    SetBasedFieldSelector fieldSelector =
        new SetBasedFieldSelector(
            Collections.singleton(uniqFieldName), Collections.<String>emptySet());
    TVMapper mapper = new TVMapper(reader);
    mapper.fieldOptions =
        allFields; // this will only stay set if fieldOptions.isEmpty() (in other words, only if the
                   // user didn't set any fields)
    while (iter.hasNext()) {
      Integer docId = iter.next();
      NamedList docNL = new NamedList();
      mapper.docNL = docNL;
      termVectors.add("doc-" + docId, docNL);

      if (keyField != null) {
        Document document = reader.document(docId, fieldSelector);
        Fieldable uniqId = document.getFieldable(uniqFieldName);
        String uniqVal = null;
        if (uniqId != null) {
          uniqVal = keyField.getType().storedToReadable(uniqId);
        }
        if (uniqVal != null) {
          docNL.add("uniqueKey", uniqVal);
          termVectors.add("uniqueKeyFieldName", uniqFieldName);
        }
      }
      if (fieldOptions.isEmpty() == false) {
        for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
          mapper.fieldOptions = entry.getValue();
          reader.getTermFreqVector(docId, entry.getKey(), mapper);
        }
      } else {
        // deal with all fields by using the allFieldMapper
        reader.getTermFreqVector(docId, mapper);
      }
    }
  }