public void build(SolrCore core, SolrIndexSearcher searcher) {
    IndexReader reader = null;
    try {
      if (sourceLocation == null) {
        // Load from Solr's index
        reader = searcher.getReader();
      } else {
        // Load from Lucene index at given sourceLocation
        reader = this.reader;
      }

      // Create the dictionary
      dictionary = new HighFrequencyDictionary(reader, field, threshold);
      spellChecker.clearIndex();
      spellChecker.indexDictionary(dictionary);

    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
Ejemplo n.º 2
0
  protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher)
      throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;

    // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
    // currently have an option to return sort field values.  Because of this, we
    // take the documents given and re-derive the sort values.
    boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
    if (fsv) {
      Sort sort = rb.getSortSpec().getSort();
      SortField[] sortFields =
          sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort();
      NamedList sortVals = new NamedList(); // order is important for the sort fields
      Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field

      SolrIndexReader reader = searcher.getReader();
      SolrIndexReader[] readers = reader.getLeafReaders();
      SolrIndexReader subReader = reader;
      if (readers.length == 1) {
        // if there is a single segment, use that subReader and avoid looking up each time
        subReader = readers[0];
        readers = null;
      }
      int[] offsets = reader.getLeafOffsets();

      for (SortField sortField : sortFields) {
        int type = sortField.getType();
        if (type == SortField.SCORE || type == SortField.DOC) continue;

        FieldComparator comparator = null;
        FieldComparator comparators[] =
            (readers == null) ? null : new FieldComparator[readers.length];

        String fieldname = sortField.getField();
        FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname);

        DocList docList = rb.getResults().docList;
        ArrayList<Object> vals = new ArrayList<Object>(docList.size());
        DocIterator it = rb.getResults().docList.iterator();

        int offset = 0;
        int idx = 0;

        while (it.hasNext()) {
          int doc = it.nextDoc();
          if (readers != null) {
            idx = SolrIndexReader.readerIndex(doc, offsets);
            subReader = readers[idx];
            offset = offsets[idx];
            comparator = comparators[idx];
          }

          if (comparator == null) {
            comparator = sortField.getComparator(1, 0);
            comparator = comparator.setNextReader(subReader, offset);
            if (comparators != null) comparators[idx] = comparator;
          }

          doc -= offset; // adjust for what segment this is in
          comparator.copy(0, doc);
          Object val = comparator.value(0);

          // Sortable float, double, int, long types all just use a string
          // comparator. For these, we need to put the type into a readable
          // format.  One reason for this is that XML can't represent all
          // string values (or even all unicode code points).
          // indexedToReadable() should be a no-op and should
          // thus be harmless anyway (for all current ways anyway)
          if (val instanceof String) {
            field.setValue((String) val);
            val = ft.toObject(field);
          }

          // Must do the same conversion when sorting by a
          // String field in Lucene, which returns the terms
          // data as BytesRef:
          if (val instanceof BytesRef) {
            field.setValue(((BytesRef) val).utf8ToString());
            val = ft.toObject(field);
          }

          vals.add(val);
        }

        sortVals.add(fieldname, vals);
      }

      rsp.add("sort_values", sortVals);
    }
  }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }

    NamedList termVectors = new NamedList();
    rb.rsp.add(TERM_VECTORS, termVectors);
    FieldOptions allFields = new FieldOptions();
    // figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    // short cut to all values.
    boolean all = params.getBool(TermVectorParams.ALL, false);
    if (all == true) {
      allFields.termFreq = true;
      allFields.positions = true;
      allFields.offsets = true;
      allFields.docFreq = true;
      allFields.tfIdf = true;
    }

    String fldLst = params.get(TermVectorParams.FIELDS);
    if (fldLst == null) {
      fldLst = params.get(CommonParams.FL);
    }

    // use this to validate our fields
    IndexSchema schema = rb.req.getSchema();
    // Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList warnings = new NamedList();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    // we have specific fields to retrieve
    if (fldLst != null) {
      String[] fields = SolrPluginUtils.split(fldLst);
      for (String field : fields) {
        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
          if (sf.storeTermVector()) {
            FieldOptions option = fieldOptions.get(field);
            if (option == null) {
              option = new FieldOptions();
              option.fieldName = field;
              fieldOptions.put(field, option);
            }
            // get the per field mappings
            option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
            option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
            option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
            // Validate these are even an option
            option.positions =
                params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
            if (option.positions == true && sf.storeTermPositions() == false) {
              noPos.add(field);
            }
            option.offsets =
                params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
            if (option.offsets == true && sf.storeTermOffsets() == false) {
              noOff.add(field);
            }
          } else { // field doesn't have term vectors
            noTV.add(field);
          }
        } else {
          // field doesn't exist
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
        }
      }
    } // else, deal with all fields
    boolean hasWarnings = false;
    if (noTV.isEmpty() == false) {
      warnings.add("noTermVectors", noTV);
      hasWarnings = true;
    }
    if (noPos.isEmpty() == false) {
      warnings.add("noPositions", noPos);
      hasWarnings = true;
    }
    if (noOff.isEmpty() == false) {
      warnings.add("noOffsets", noOff);
      hasWarnings = true;
    }
    if (hasWarnings == true) {
      termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && docIds.isEmpty() == false) {
      iter = docIds.iterator();
    } else {
      DocList list = listAndSet.docList;
      iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getReader();
    // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
      uniqFieldName = keyField.getName();
    }
    // Only load the id field to get the uniqueKey of that field
    SetBasedFieldSelector fieldSelector =
        new SetBasedFieldSelector(
            Collections.singleton(uniqFieldName), Collections.<String>emptySet());
    TVMapper mapper = new TVMapper(reader);
    mapper.fieldOptions =
        allFields; // this will only stay set if fieldOptions.isEmpty() (in other words, only if the
                   // user didn't set any fields)
    while (iter.hasNext()) {
      Integer docId = iter.next();
      NamedList docNL = new NamedList();
      mapper.docNL = docNL;
      termVectors.add("doc-" + docId, docNL);

      if (keyField != null) {
        Document document = reader.document(docId, fieldSelector);
        Fieldable uniqId = document.getFieldable(uniqFieldName);
        String uniqVal = null;
        if (uniqId != null) {
          uniqVal = keyField.getType().storedToReadable(uniqId);
        }
        if (uniqVal != null) {
          docNL.add("uniqueKey", uniqVal);
          termVectors.add("uniqueKeyFieldName", uniqFieldName);
        }
      }
      if (fieldOptions.isEmpty() == false) {
        for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
          mapper.fieldOptions = entry.getValue();
          reader.getTermFreqVector(docId, entry.getKey(), mapper);
        }
      } else {
        // deal with all fields by using the allFieldMapper
        reader.getTermFreqVector(docId, mapper);
      }
    }
  }
Ejemplo n.º 4
0
  @Override
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    IndexSchema schema = req.getSchema();
    SolrIndexSearcher searcher = req.getSearcher();
    IndexReader reader = searcher.getReader();
    SolrParams params = req.getParams();
    int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);

    // Always show the core lucene info
    rsp.add("index", getIndexInfo(reader, numTerms > 0));

    Integer docId = params.getInt(DOC_ID);
    if (docId == null && params.get(ID) != null) {
      // Look for something with a given solr ID
      SchemaField uniqueKey = schema.getUniqueKeyField();
      String v = uniqueKey.getType().toInternal(params.get(ID));
      Term t = new Term(uniqueKey.getName(), v);
      docId = searcher.getFirstMatch(t);
      if (docId < 0) {
        throw new SolrException(
            SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID));
      }
    }

    // Read the document from the index
    if (docId != null) {
      Document doc = null;
      try {
        doc = reader.document(docId);
      } catch (Exception ex) {
      }
      if (doc == null) {
        throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + docId);
      }

      SimpleOrderedMap<Object> info = getDocumentFieldsInfo(doc, docId, reader, schema);

      SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<Object>();
      docinfo.add("docId", docId);
      docinfo.add("lucene", info);
      docinfo.add("solr", doc);
      rsp.add("doc", docinfo);
    } else if ("schema".equals(params.get("show"))) {
      rsp.add("schema", getSchemaInfo(req.getSchema()));
    } else {
      // If no doc is given, show all fields and top terms
      Set<String> fields = null;
      if (params.get(CommonParams.FL) != null) {
        fields = new HashSet<String>();
        for (String f : params.getParams(CommonParams.FL)) {
          fields.add(f);
        }
      }
      rsp.add("fields", getIndexedFieldsInfo(searcher, fields, numTerms));
    }

    // Add some generally helpful information
    NamedList<Object> info = new SimpleOrderedMap<Object>();
    info.add("key", getFieldFlagsKey());
    info.add(
        "NOTE",
        "Document Frequency (df) is not updated when a document is marked for deletion.  df values include deleted documents.");
    rsp.add("info", info);
    rsp.setHttpCaching(false);
  }
Ejemplo n.º 5
0
  @SuppressWarnings("unchecked")
  private static SimpleOrderedMap<Object> getIndexedFieldsInfo(
      final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms)
      throws Exception {

    IndexReader reader = searcher.getReader();
    IndexSchema schema = searcher.getSchema();

    // Walk the term enum and keep a priority queue for each map in our set
    Map<String, TopTermQueue> ttinfo = null;
    if (numTerms > 0) {
      ttinfo = getTopTerms(reader, fields, numTerms, null);
    }
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
    Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
    for (String fieldName : fieldNames) {
      if (fields != null && !fields.contains(fieldName)) {
        continue; // if a field is specified, only them
      }

      SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();

      SchemaField sfield = schema.getFieldOrNull(fieldName);
      FieldType ftype = (sfield == null) ? null : sfield.getType();

      f.add("type", (ftype == null) ? null : ftype.getTypeName());
      f.add("schema", getFieldFlags(sfield));
      if (sfield != null
          && schema.isDynamicField(sfield.getName())
          && schema.getDynamicPattern(sfield.getName()) != null) {
        f.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
      }

      // If numTerms==0, the call is just asking for a quick field list
      if (ttinfo != null && sfield != null && sfield.indexed()) {
        Query q = new TermRangeQuery(fieldName, null, null, false, false);
        TopDocs top = searcher.search(q, 1);
        if (top.totalHits > 0) {
          // Find a document with this field
          try {
            Document doc = searcher.doc(top.scoreDocs[0].doc);
            Fieldable fld = doc.getFieldable(fieldName);
            if (fld != null) {
              f.add("index", getFieldFlags(fld));
            } else {
              // it is a non-stored field...
              f.add("index", "(unstored field)");
            }
          } catch (Exception ex) {
            log.warn("error reading field: " + fieldName);
          }
        }
        f.add("docs", top.totalHits);

        TopTermQueue topTerms = ttinfo.get(fieldName);
        if (topTerms != null) {
          f.add("distinct", topTerms.distinctTerms);

          // Include top terms
          f.add("topTerms", topTerms.toNamedList(searcher.getSchema()));

          // Add a histogram
          f.add("histogram", topTerms.histogram.toNamedList());
        }
      }

      // Add the field
      finfo.add(fieldName, f);
    }
    return finfo;
  }