@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (params.getBool(TermsParams.TERMS, false)) { String lowerStr = params.get(TermsParams.TERMS_LOWER, null); String[] fields = params.getParams(TermsParams.TERMS_FIELD); if (fields != null && fields.length > 0) { NamedList terms = new SimpleOrderedMap(); rb.rsp.add("terms", terms); int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); for (int j = 0; j < fields.length; j++) { String field = StringHelper.intern(fields[j]); FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // If no lower bound was specified, use the prefix String lower = lowerStr == null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr)); if (lower == null) lower = ""; String upper = upperStr == null ? null : (raw ? upperStr : ft.toInternal(upperStr)); Term lowerTerm = new Term(field, lower); Term upperTerm = upper == null ? null : new Term(field, upper); TermEnum termEnum = rb.req .getSearcher() .getReader() .terms(lowerTerm); // this will be positioned ready to go int i = 0; BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null); NamedList fieldTerms = new NamedList(); terms.add(field, fieldTerms); Term lowerTestTerm = termEnum.term(); // Only advance the enum if we are excluding the lower bound and the lower Term actually // matches if (lowerTestTerm != null && lowerIncl == false && lowerTestTerm.field() == field // intern'd comparison && lowerTestTerm.text().equals(lower)) { termEnum.next(); } while (i < limit || sort) { Term theTerm = termEnum.term(); // check for a different field, or the end of the index. if (theTerm == null || field != theTerm.field()) // intern'd comparison break; String indexedText = theTerm.text(); // stop if the prefix doesn't match if (prefix != null && !indexedText.startsWith(prefix)) break; if (pattern != null && !pattern.matcher(indexedText).matches()) { termEnum.next(); continue; } if (upperTerm != null) { int upperCmp = theTerm.compareTo(upperTerm); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are // satisfied. int docFreq = termEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list String label = raw ? indexedText : ft.indexedToReadable(indexedText); if (sort) { queue.add(new CountPair<String, Integer>(label, docFreq)); } else { fieldTerms.add(label, docFreq); i++; } } termEnum.next(); } termEnum.close(); if (sort) { for (CountPair<String, Integer> item : queue) { if (i < limit) { fieldTerms.add(item.key, item.val); i++; } else { break; } } } } } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); } } }
BloomFilterLoader(IndexReader reader, String field) { this.reader = reader; this.field = StringHelper.intern(field); }
public JSONWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) { super(writer, req, rsp); namedListStyle = StringHelper.intern(req.getParams().get(JSON_NL_STYLE, JSON_NL_FLAT)); wrapperFunction = req.getParams().get(JSON_WRAPPER_FUNCTION); }
private static float[] getFloats(FileFloatSource ffs, IndexReader reader) { float[] vals = new float[reader.maxDoc()]; if (ffs.defVal != 0) { Arrays.fill(vals, ffs.defVal); } InputStream is; String fname = "external_" + ffs.field.getName(); try { is = VersionedFile.getLatestFile(ffs.dataDir, fname); } catch (IOException e) { // log, use defaults SolrCore.log.error("Error opening external value source file: " + e); return vals; } BufferedReader r = new BufferedReader(new InputStreamReader(is)); String idName = StringHelper.intern(ffs.keyField.getName()); FieldType idType = ffs.keyField.getType(); boolean sorted = true; // assume sorted until we discover it's not // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next() // because of this, simply ask the reader for a new termEnum rather than // trying to use skipTo() List<String> notFound = new ArrayList<String>(); int notFoundCount = 0; int otherErrors = 0; TermDocs termDocs = null; Term protoTerm = new Term(idName, ""); TermEnum termEnum = null; // Number of times to try termEnum.next() before resorting to skip int numTimesNext = 10; char delimiter = '='; String termVal; boolean hasNext = true; String prevKey = ""; String lastVal = "\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF"; try { termDocs = reader.termDocs(); termEnum = reader.terms(protoTerm); Term t = termEnum.term(); if (t != null && t.field() == idName) { // intern'd comparison termVal = t.text(); } else { termVal = lastVal; } for (String line; (line = r.readLine()) != null; ) { int delimIndex = line.indexOf(delimiter); if (delimIndex < 0) continue; int endIndex = line.length(); /* EOLs should already be removed for BufferedReader.readLine() for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) { char ch = line.charAt(endIndex-1); if (ch!='\n' && ch!='\r') break; } */ String key = line.substring(0, delimIndex); String val = line.substring(delimIndex + 1, endIndex); String internalKey = idType.toInternal(key); float fval; try { fval = Float.parseFloat(val); } catch (Exception e) { if (++otherErrors <= 10) { SolrCore.log.error( "Error loading external value source + fileName + " + e + (otherErrors < 10 ? "" : "\tSkipping future errors for this file.")); } continue; // go to next line in file.. leave values as default. } if (sorted) { // make sure this key is greater than the previous key sorted = internalKey.compareTo(prevKey) >= 0; prevKey = internalKey; if (sorted) { int countNext = 0; for (; ; ) { int cmp = internalKey.compareTo(termVal); if (cmp == 0) { termDocs.seek(termEnum); while (termDocs.next()) { vals[termDocs.doc()] = fval; } break; } else if (cmp < 0) { // term enum has already advanced past current key... we didn't find it. if (notFoundCount < 10) { // collect first 10 not found for logging notFound.add(key); } notFoundCount++; break; } else { // termEnum is less than our current key, so skip ahead // try next() a few times to see if we hit or pass the target. // Lucene's termEnum.skipTo() is currently unoptimized (it just does next()) // so the best thing is to simply ask the reader for a new termEnum(target) // if we really need to skip. if (++countNext > numTimesNext) { termEnum = reader.terms(protoTerm.createTerm(internalKey)); t = termEnum.term(); } else { hasNext = termEnum.next(); t = hasNext ? termEnum.term() : null; } if (t != null && t.field() == idName) { // intern'd comparison termVal = t.text(); } else { termVal = lastVal; } } } // end for(;;) } } if (!sorted) { termEnum = reader.terms(protoTerm.createTerm(internalKey)); t = termEnum.term(); if (t != null && t.field() == idName // intern'd comparison && internalKey.equals(t.text())) { termDocs.seek(termEnum); while (termDocs.next()) { vals[termDocs.doc()] = fval; } } else { if (notFoundCount < 10) { // collect first 10 not found for logging notFound.add(key); } notFoundCount++; } } } } catch (IOException e) { // log, use defaults SolrCore.log.error("Error loading external value source: " + e); } finally { // swallow exceptions on close so we don't override any // exceptions that happened in the loop if (termDocs != null) try { termDocs.close(); } catch (Exception e) { } if (termEnum != null) try { termEnum.close(); } catch (Exception e) { } try { r.close(); } catch (Exception e) { } } SolrCore.log.info( "Loaded external value source " + fname + (notFoundCount == 0 ? "" : " :" + notFoundCount + " missing keys " + notFound)); return vals; }