public void build(SolrCore core, SolrIndexSearcher searcher) { IndexReader reader = null; try { if (sourceLocation == null) { // Load from Solr's index reader = searcher.getReader(); } else { // Load from Lucene index at given sourceLocation reader = this.reader; } // Create the dictionary dictionary = new HighFrequencyDictionary(reader, field, threshold); spellChecker.clearIndex(); spellChecker.indexDictionary(dictionary); } catch (IOException e) { throw new RuntimeException(e); } }
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { Sort sort = rb.getSortSpec().getSort(); SortField[] sortFields = sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort(); NamedList sortVals = new NamedList(); // order is important for the sort fields Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field SolrIndexReader reader = searcher.getReader(); SolrIndexReader[] readers = reader.getLeafReaders(); SolrIndexReader subReader = reader; if (readers.length == 1) { // if there is a single segment, use that subReader and avoid looking up each time subReader = readers[0]; readers = null; } int[] offsets = reader.getLeafOffsets(); for (SortField sortField : sortFields) { int type = sortField.getType(); if (type == SortField.SCORE || type == SortField.DOC) continue; FieldComparator comparator = null; FieldComparator comparators[] = (readers == null) ? null : new FieldComparator[readers.length]; String fieldname = sortField.getField(); FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname); DocList docList = rb.getResults().docList; ArrayList<Object> vals = new ArrayList<Object>(docList.size()); DocIterator it = rb.getResults().docList.iterator(); int offset = 0; int idx = 0; while (it.hasNext()) { int doc = it.nextDoc(); if (readers != null) { idx = SolrIndexReader.readerIndex(doc, offsets); subReader = readers[idx]; offset = offsets[idx]; comparator = comparators[idx]; } if (comparator == null) { comparator = sortField.getComparator(1, 0); comparator = comparator.setNextReader(subReader, offset); if (comparators != null) comparators[idx] = comparator; } doc -= offset; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); // Sortable float, double, int, long types all just use a string // comparator. For these, we need to put the type into a readable // format. One reason for this is that XML can't represent all // string values (or even all unicode code points). // indexedToReadable() should be a no-op and should // thus be harmless anyway (for all current ways anyway) if (val instanceof String) { field.setValue((String) val); val = ft.toObject(field); } // Must do the same conversion when sorting by a // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { field.setValue(((BytesRef) val).utf8ToString()); val = ft.toObject(field); } vals.add(val); } sortVals.add(fieldname, vals); } rsp.add("sort_values", sortVals); } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } NamedList termVectors = new NamedList(); rb.rsp.add(TERM_VECTORS, termVectors); FieldOptions allFields = new FieldOptions(); // figure out what options we have, and try to get the appropriate vector allFields.termFreq = params.getBool(TermVectorParams.TF, false); allFields.positions = params.getBool(TermVectorParams.POSITIONS, false); allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false); allFields.docFreq = params.getBool(TermVectorParams.DF, false); allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false); // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false); // short cut to all values. boolean all = params.getBool(TermVectorParams.ALL, false); if (all == true) { allFields.termFreq = true; allFields.positions = true; allFields.offsets = true; allFields.docFreq = true; allFields.tfIdf = true; } String fldLst = params.get(TermVectorParams.FIELDS); if (fldLst == null) { fldLst = params.get(CommonParams.FL); } // use this to validate our fields IndexSchema schema = rb.req.getSchema(); // Build up our per field mapping Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>(); NamedList warnings = new NamedList(); List<String> noTV = new ArrayList<String>(); List<String> noPos = new ArrayList<String>(); List<String> noOff = new ArrayList<String>(); // we have specific fields to retrieve if (fldLst != null) { String[] fields = SolrPluginUtils.split(fldLst); for (String field : fields) { SchemaField sf = schema.getFieldOrNull(field); if (sf != null) { if (sf.storeTermVector()) { FieldOptions option = fieldOptions.get(field); if (option == null) { option = new FieldOptions(); option.fieldName = field; fieldOptions.put(field, option); } // get the per field mappings option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq); option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq); option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf); // Validate these are even an option option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions); if (option.positions == true && sf.storeTermPositions() == false) { noPos.add(field); } option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets); if (option.offsets == true && sf.storeTermOffsets() == false) { noOff.add(field); } } else { // field doesn't have term vectors noTV.add(field); } } else { // field doesn't exist throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field); } } } // else, deal with all fields boolean hasWarnings = false; if (noTV.isEmpty() == false) { warnings.add("noTermVectors", noTV); hasWarnings = true; } if (noPos.isEmpty() == false) { warnings.add("noPositions", noPos); hasWarnings = true; } if (noOff.isEmpty() == false) { warnings.add("noOffsets", noOff); hasWarnings = true; } if (hasWarnings == true) { termVectors.add("warnings", warnings); } DocListAndSet listAndSet = rb.getResults(); List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS)); Iterator<Integer> iter; if (docIds != null && docIds.isEmpty() == false) { iter = docIds.iterator(); } else { DocList list = listAndSet.docList; iter = list.iterator(); } SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getReader(); // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors SchemaField keyField = schema.getUniqueKeyField(); String uniqFieldName = null; if (keyField != null) { uniqFieldName = keyField.getName(); } // Only load the id field to get the uniqueKey of that field SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector( Collections.singleton(uniqFieldName), Collections.<String>emptySet()); TVMapper mapper = new TVMapper(reader); mapper.fieldOptions = allFields; // this will only stay set if fieldOptions.isEmpty() (in other words, only if the // user didn't set any fields) while (iter.hasNext()) { Integer docId = iter.next(); NamedList docNL = new NamedList(); mapper.docNL = docNL; termVectors.add("doc-" + docId, docNL); if (keyField != null) { Document document = reader.document(docId, fieldSelector); Fieldable uniqId = document.getFieldable(uniqFieldName); String uniqVal = null; if (uniqId != null) { uniqVal = keyField.getType().storedToReadable(uniqId); } if (uniqVal != null) { docNL.add("uniqueKey", uniqVal); termVectors.add("uniqueKeyFieldName", uniqFieldName); } } if (fieldOptions.isEmpty() == false) { for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) { mapper.fieldOptions = entry.getValue(); reader.getTermFreqVector(docId, entry.getKey(), mapper); } } else { // deal with all fields by using the allFieldMapper reader.getTermFreqVector(docId, mapper); } } }
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); IndexReader reader = searcher.getReader(); SolrParams params = req.getParams(); int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT); // Always show the core lucene info rsp.add("index", getIndexInfo(reader, numTerms > 0)); Integer docId = params.getInt(DOC_ID); if (docId == null && params.get(ID) != null) { // Look for something with a given solr ID SchemaField uniqueKey = schema.getUniqueKeyField(); String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); if (docId < 0) { throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); } } // Read the document from the index if (docId != null) { Document doc = null; try { doc = reader.document(docId); } catch (Exception ex) { } if (doc == null) { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + docId); } SimpleOrderedMap<Object> info = getDocumentFieldsInfo(doc, docId, reader, schema); SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<Object>(); docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); rsp.add("doc", docinfo); } else if ("schema".equals(params.get("show"))) { rsp.add("schema", getSchemaInfo(req.getSchema())); } else { // If no doc is given, show all fields and top terms Set<String> fields = null; if (params.get(CommonParams.FL) != null) { fields = new HashSet<String>(); for (String f : params.getParams(CommonParams.FL)) { fields.add(f); } } rsp.add("fields", getIndexedFieldsInfo(searcher, fields, numTerms)); } // Add some generally helpful information NamedList<Object> info = new SimpleOrderedMap<Object>(); info.add("key", getFieldFlagsKey()); info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); rsp.add("info", info); rsp.setHttpCaching(false); }
@SuppressWarnings("unchecked") private static SimpleOrderedMap<Object> getIndexedFieldsInfo( final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms) throws Exception { IndexReader reader = searcher.getReader(); IndexSchema schema = searcher.getSchema(); // Walk the term enum and keep a priority queue for each map in our set Map<String, TopTermQueue> ttinfo = null; if (numTerms > 0) { ttinfo = getTopTerms(reader, fields, numTerms, null); } SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); for (String fieldName : fieldNames) { if (fields != null && !fields.contains(fieldName)) { continue; // if a field is specified, only them } SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { f.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); } // If numTerms==0, the call is just asking for a quick field list if (ttinfo != null && sfield != null && sfield.indexed()) { Query q = new TermRangeQuery(fieldName, null, null, false, false); TopDocs top = searcher.search(q, 1); if (top.totalHits > 0) { // Find a document with this field try { Document doc = searcher.doc(top.scoreDocs[0].doc); Fieldable fld = doc.getFieldable(fieldName); if (fld != null) { f.add("index", getFieldFlags(fld)); } else { // it is a non-stored field... f.add("index", "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: " + fieldName); } } f.add("docs", top.totalHits); TopTermQueue topTerms = ttinfo.get(fieldName); if (topTerms != null) { f.add("distinct", topTerms.distinctTerms); // Include top terms f.add("topTerms", topTerms.toNamedList(searcher.getSchema())); // Add a histogram f.add("histogram", topTerms.histogram.toNamedList()); } } // Add the field finfo.add(fieldName, f); } return finfo; }