public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); params = adjustParams(params); req.setParams(params); TupleStream tupleStream = null; try { tupleStream = this.streamFactory.constructStream(params.get("expr")); } catch (Exception e) { // Catch exceptions that occur while the stream is being created. This will include streaming // expression parse rules. SolrException.log(logger, e); rsp.add("result-set", new DummyErrorStream(e)); return; } int worker = params.getInt("workerID", 0); int numWorkers = params.getInt("numWorkers", 1); StreamContext context = new StreamContext(); context.workerID = worker; context.numWorkers = numWorkers; context.setSolrClientCache(clientCache); tupleStream.setStreamContext(context); rsp.add("result-set", new TimerStream(new ExceptionStream(tupleStream))); }
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); params = adjustParams(params); req.setParams(params); if (params.get("action") != null) { handleAdmin(req, rsp, params); return; } TupleStream tupleStream; try { tupleStream = this.streamFactory.constructStream(params.get("expr")); } catch (Exception e) { // Catch exceptions that occur while the stream is being created. This will include streaming // expression parse rules. SolrException.log(logger, e); rsp.add("result-set", new DummyErrorStream(e)); return; } int worker = params.getInt("workerID", 0); int numWorkers = params.getInt("numWorkers", 1); StreamContext context = new StreamContext(); context.workerID = worker; context.numWorkers = numWorkers; context.setSolrClientCache(clientCache); context.setModelCache(modelCache); context.put("core", this.coreName); context.put("solr-core", req.getCore()); tupleStream.setStreamContext(context); // if asking for explanation then go get it if (params.getBool("explain", false)) { rsp.add("explanation", tupleStream.toExplanation(this.streamFactory)); } if (tupleStream instanceof DaemonStream) { DaemonStream daemonStream = (DaemonStream) tupleStream; if (daemons.containsKey(daemonStream.getId())) { daemons.remove(daemonStream.getId()).close(); } daemonStream.setDaemons(daemons); daemonStream.open(); // This will start the deamonStream daemons.put(daemonStream.getId(), daemonStream); rsp.add( "result-set", new DaemonResponseStream("Deamon:" + daemonStream.getId() + " started on " + coreName)); } else { rsp.add("result-set", new TimerStream(new ExceptionStream(tupleStream))); } }
@Override public XMLLoader init(SolrParams args) { // Init StAX parser: inputFactory = XMLInputFactory.newInstance(); EmptyEntityResolver.configureXMLInputFactory(inputFactory); inputFactory.setXMLReporter(xmllog); try { // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe // XMLInputFactory, as that implementation tries to cache and reuse the // XMLStreamReader. Setting the parser-specific "reuse-instance" property to false // prevents this. // All other known open-source stax parsers (and the bea ref impl) // have thread-safe factories. inputFactory.setProperty("reuse-instance", Boolean.FALSE); } catch (IllegalArgumentException ex) { // Other implementations will likely throw this exception since "reuse-instance" // isimplementation specific. log.debug("Unable to set the 'reuse-instance' property for the input chain: " + inputFactory); } // Init SAX parser (for XSL): saxFactory = SAXParserFactory.newInstance(); saxFactory.setNamespaceAware(true); // XSL needs this! EmptyEntityResolver.configureSAXParserFactory(saxFactory); xsltCacheLifetimeSeconds = XSLT_CACHE_DEFAULT; if (args != null) { xsltCacheLifetimeSeconds = args.getInt(XSLT_CACHE_PARAM, XSLT_CACHE_DEFAULT); log.info("xsltCacheLifetimeSeconds=" + xsltCacheLifetimeSeconds); } return this; }
public void processGetVersions(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } int nVersions = params.getInt("getVersions", -1); if (nVersions == -1) return; String sync = params.get("sync"); if (sync != null) { processSync(rb, nVersions, sync); return; } UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog(); if (ulog == null) return; UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates(); try { rb.rsp.add("versions", recentUpdates.getVersions(nVersions)); } finally { recentUpdates.close(); // cache this somehow? } }
/** * Generates a list of Highlighted query fragments for each item in a list of documents, or * returns null if highlighting is disabled. * * @param docs query results * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * @return NamedList containing a NamedList for each document, which in turns contains sets * (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting( DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
public ExtractingDocumentLoader( SolrQueryRequest req, UpdateRequestProcessor processor, TikaConfig config, ParseContextConfig parseContextConfig, SolrContentHandlerFactory factory) { this.params = req.getParams(); this.core = req.getCore(); this.config = config; this.parseContextConfig = parseContextConfig; this.processor = processor; templateAdd = new AddUpdateCommand(req); templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true); templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); // this is lightweight autoDetectParser = new AutoDetectParser(config); this.factory = factory; ignoreTikaException = params.getBool(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); }
CSVLoaderBase(SolrQueryRequest req, UpdateRequestProcessor processor) { this.processor = processor; this.params = req.getParams(); this.literals = new HashMap<>(); templateAdd = new AddUpdateCommand(req); templateAdd.overwrite = params.getBool(OVERWRITE, true); templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); strategy = new CSVStrategy( ',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false, false, false, true); String sep = params.get(SEPARATOR); if (sep != null) { if (sep.length() != 1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'"); strategy.setDelimiter(sep.charAt(0)); } String encapsulator = params.get(ENCAPSULATOR); if (encapsulator != null) { if (encapsulator.length() != 1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'"); } String escape = params.get(ESCAPE); if (escape != null) { if (escape.length() != 1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'"); } rowId = params.get(ROW_ID); rowIdOffset = params.getInt(ROW_ID_OFFSET, 0); // if only encapsulator or escape is set, disable the other escaping mechanism if (encapsulator == null && escape != null) { strategy.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED); strategy.setEscape(escape.charAt(0)); } else { if (encapsulator != null) { strategy.setEncapsulator(encapsulator.charAt(0)); } if (escape != null) { char ch = escape.charAt(0); strategy.setEscape(ch); if (ch == '\\') { // If the escape is the standard backslash, then also enable // unicode escapes (it's harmless since 'u' would not otherwise // be escaped. strategy.setUnicodeEscapeInterpretation(true); } } } String fn = params.get(FIELDNAMES); fieldnames = fn != null ? commaSplit.split(fn, -1) : null; Boolean hasHeader = params.getBool(HEADER); skipLines = params.getInt(SKIPLINES, 0); if (fieldnames == null) { if (null == hasHeader) { // assume the file has the headers if they aren't supplied in the args hasHeader = true; } else if (!hasHeader) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "CSVLoader: must specify fieldnames=<fields>* or header=true"); } } else { // if the fieldnames were supplied and the file has a header, we need to // skip over that header. if (hasHeader != null && hasHeader) skipLines++; prepareFields(); } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(TermsParams.TERMS, false)) return; String[] fields = params.getParams(TermsParams.TERMS_FIELD); NamedList<Object> termsResult = new SimpleOrderedMap<>(); rb.rsp.add("terms", termsResult); if (fields == null || fields.length == 0) return; int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String lowerStr = params.get(TermsParams.TERMS_LOWER); String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader(); Fields lfields = indexReader.fields(); for (String field : fields) { NamedList<Integer> fieldTerms = new NamedList<>(); termsResult.add(field, fieldTerms); Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field continue; } FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // prefix must currently be text BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix); BytesRef upperBytes = null; if (upperStr != null) { upperBytes = new BytesRef(); ft.readableToIndexed(upperStr, upperBytes); } BytesRef lowerBytes; if (lowerStr == null) { // If no lower bound was specified, use the prefix lowerBytes = prefixBytes; } else { lowerBytes = new BytesRef(); if (raw) { // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists // perhaps we detect if the FieldType is non-character and expect hex if so? lowerBytes = new BytesRef(lowerStr); } else { lowerBytes = new BytesRef(); ft.readableToIndexed(lowerStr, lowerBytes); } } TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; if (lowerBytes != null) { if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); // Only advance the enum if we are excluding the lower bound and the lower Term actually // matches if (lowerIncl == false && term.equals(lowerBytes)) { term = termsEnum.next(); } } } else { // position termsEnum on first term term = termsEnum.next(); } int i = 0; BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null); CharsRef external = new CharsRef(); while (term != null && (i < limit || sort)) { boolean externalized = false; // did we fill in "external" yet for this term? // stop if the prefix doesn't match if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break; if (pattern != null) { // indexed text or external text? // TODO: support "raw" mode? ft.indexedToReadable(term, external); externalized = true; if (!pattern.matcher(external).matches()) { term = termsEnum.next(); continue; } } if (upperBytes != null) { int upperCmp = term.compareTo(upperBytes); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list if (sort) { queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); i++; } } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { if (i >= limit) break; ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); i++; } } } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (params.getBool(TermsParams.TERMS, false)) { String lowerStr = params.get(TermsParams.TERMS_LOWER, null); String[] fields = params.getParams(TermsParams.TERMS_FIELD); if (fields != null && fields.length > 0) { NamedList terms = new SimpleOrderedMap(); rb.rsp.add("terms", terms); int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); for (int j = 0; j < fields.length; j++) { String field = StringHelper.intern(fields[j]); FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // If no lower bound was specified, use the prefix String lower = lowerStr == null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr)); if (lower == null) lower = ""; String upper = upperStr == null ? null : (raw ? upperStr : ft.toInternal(upperStr)); Term lowerTerm = new Term(field, lower); Term upperTerm = upper == null ? null : new Term(field, upper); TermEnum termEnum = rb.req .getSearcher() .getReader() .terms(lowerTerm); // this will be positioned ready to go int i = 0; BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null); NamedList fieldTerms = new NamedList(); terms.add(field, fieldTerms); Term lowerTestTerm = termEnum.term(); // Only advance the enum if we are excluding the lower bound and the lower Term actually // matches if (lowerTestTerm != null && lowerIncl == false && lowerTestTerm.field() == field // intern'd comparison && lowerTestTerm.text().equals(lower)) { termEnum.next(); } while (i < limit || sort) { Term theTerm = termEnum.term(); // check for a different field, or the end of the index. if (theTerm == null || field != theTerm.field()) // intern'd comparison break; String indexedText = theTerm.text(); // stop if the prefix doesn't match if (prefix != null && !indexedText.startsWith(prefix)) break; if (pattern != null && !pattern.matcher(indexedText).matches()) { termEnum.next(); continue; } if (upperTerm != null) { int upperCmp = theTerm.compareTo(upperTerm); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are // satisfied. int docFreq = termEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list String label = raw ? indexedText : ft.indexedToReadable(indexedText); if (sort) { queue.add(new CountPair<String, Integer>(label, docFreq)); } else { fieldTerms.add(label, docFreq); i++; } } termEnum.next(); } termEnum.close(); if (sort) { for (CountPair<String, Integer> item : queue) { if (i < limit) { fieldTerms.add(item.key, item.val); i++; } else { break; } } } } } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); } } }
private void initParams(SolrParams params) { if (params != null) { // Document-centric langId params setEnabled(params.getBool(LANGUAGE_ID, true)); if (params.get(FIELDS_PARAM, "").length() > 0) { inputFields = params.get(FIELDS_PARAM, "").split(","); } langField = params.get(LANG_FIELD, DOCID_LANGFIELD_DEFAULT); langsField = params.get(LANGS_FIELD, DOCID_LANGSFIELD_DEFAULT); SchemaField uniqueKeyField = schema.getUniqueKeyField(); docIdField = params.get( DOCID_PARAM, uniqueKeyField == null ? DOCID_FIELD_DEFAULT : uniqueKeyField.getName()); fallbackValue = params.get(FALLBACK); if (params.get(FALLBACK_FIELDS, "").length() > 0) { fallbackFields = params.get(FALLBACK_FIELDS).split(","); } overwrite = params.getBool(OVERWRITE, false); langWhitelist = new HashSet<>(); threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT); if (params.get(LANG_WHITELIST, "").length() > 0) { for (String lang : params.get(LANG_WHITELIST, "").split(",")) { langWhitelist.add(lang); } } // Mapping params (field centric) enableMapping = params.getBool(MAP_ENABLE, false); if (params.get(MAP_FL, "").length() > 0) { mapFields = params.get(MAP_FL, "").split(","); } else { mapFields = inputFields; } mapKeepOrig = params.getBool(MAP_KEEP_ORIG, false); mapOverwrite = params.getBool(MAP_OVERWRITE, false); mapIndividual = params.getBool(MAP_INDIVIDUAL, false); // Process individual fields String[] mapIndividualFields = {}; if (params.get(MAP_INDIVIDUAL_FL, "").length() > 0) { mapIndividualFields = params.get(MAP_INDIVIDUAL_FL, "").split(","); } else { mapIndividualFields = mapFields; } mapIndividualFieldsSet = new HashSet<>(Arrays.asList(mapIndividualFields)); // Compile a union of the lists of fields to map allMapFieldsSet = new HashSet<>(Arrays.asList(mapFields)); if (Arrays.equals(mapFields, mapIndividualFields)) { allMapFieldsSet.addAll(mapIndividualFieldsSet); } // Normalize detected langcode onto normalized langcode lcMap = new HashMap<>(); if (params.get(LCMAP) != null) { for (String mapping : params.get(LCMAP).split("[, ]")) { String[] keyVal = mapping.split(":"); if (keyVal.length == 2) { lcMap.put(keyVal[0], keyVal[1]); } else { log.error( "Unsupported format for langid.lcmap: " + mapping + ". Skipping this mapping."); } } } // Language Code mapping mapLcMap = new HashMap<>(); if (params.get(MAP_LCMAP) != null) { for (String mapping : params.get(MAP_LCMAP).split("[, ]")) { String[] keyVal = mapping.split(":"); if (keyVal.length == 2) { mapLcMap.put(keyVal[0], keyVal[1]); } else { log.error( "Unsupported format for langid.map.lcmap: " + mapping + ". Skipping this mapping."); } } } enforceSchema = params.getBool(ENFORCE_SCHEMA, true); mapPattern = Pattern.compile(params.get(MAP_PATTERN, MAP_PATTERN_DEFAULT)); mapReplaceStr = params.get(MAP_REPLACE, MAP_REPLACE_DEFAULT); maxFieldValueChars = params.getInt(MAX_FIELD_VALUE_CHARS, MAX_FIELD_VALUE_CHARS_DEFAULT); maxTotalChars = params.getInt(MAX_TOTAL_CHARS, MAX_TOTAL_CHARS_DEFAULT); if (maxFieldValueChars > maxTotalChars) { if (maxTotalChars == MAX_TOTAL_CHARS_DEFAULT) { // If the user specified only maxFieldValueChars, make maxTotalChars the same as it log.warn( MAX_FIELD_VALUE_CHARS + " (" + maxFieldValueChars + ") is less than " + MAX_TOTAL_CHARS + " (" + maxTotalChars + "). Setting " + MAX_TOTAL_CHARS + " to " + maxFieldValueChars + "."); maxTotalChars = maxFieldValueChars; } else { // If the user specified maxTotalChars, make maxFieldValueChars the same as it log.warn( MAX_FIELD_VALUE_CHARS + " (" + maxFieldValueChars + ") is less than " + MAX_TOTAL_CHARS + " (" + maxTotalChars + "). Setting " + MAX_FIELD_VALUE_CHARS + " to " + maxTotalChars + "."); maxFieldValueChars = maxTotalChars; } } } log.debug("LangId configured"); if (inputFields.length == 0) { throw new SolrException( ErrorCode.BAD_REQUEST, "Missing or faulty configuration of LanguageIdentifierUpdateProcessor. Input fields must be specified as a comma separated list"); } }
/** * Verify that the PivotFields we're lookin at doesn't violate any of the expected behaviors based * on the <code>TRACE_*</code> params found in the base params */ private void assertTraceOk( String pivotName, SolrParams baseParams, List<PivotField> constraints) { if (null == constraints || 0 == constraints.size()) { return; } final int maxIdx = constraints.size() - 1; final int min = baseParams.getInt(TRACE_MIN, -1); final boolean expectMissing = baseParams.getBool(TRACE_MISS, false); final boolean checkCount = "count".equals(baseParams.get(TRACE_SORT, "count")); int prevCount = Integer.MAX_VALUE; for (int i = 0; i <= maxIdx; i++) { final PivotField constraint = constraints.get(i); final int count = constraint.getCount(); if (0 < min) { assertTrue( pivotName + ": val #" + i + " of " + maxIdx + ": count(" + count + ") < facet.mincount(" + min + "): " + constraint, min <= count); } // missing value must always come last, but only if facet.missing was used // and may not exist at all (mincount, none missing for this sub-facet, etc...) if ((i < maxIdx) || (!expectMissing)) { assertNotNull( pivotName + ": val #" + i + " of " + maxIdx + " has null value: " + constraint, constraint.getValue()); } // if we are expecting count based sort, then the count of each constraint // must be lt-or-eq the count that came before -- or it must be the last value and // be "missing" if (checkCount) { assertTrue( pivotName + ": val #" + i + " of" + maxIdx + ": count(" + count + ") > prevCount(" + prevCount + "): " + constraint, ((count <= prevCount) || (expectMissing && i == maxIdx && null == constraint.getValue()))); prevCount = count; } } }
/** Actually run the query */ @Override public void process(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } SolrIndexSearcher searcher = req.getSearcher(); if (rb.getQueryCommand().getOffset() < 0) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative"); } // -1 as flag if not set. long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1); // Optional: This could also be implemented by the top-level searcher sending // a filter that lists the ids... that would be transparent to // the request handler, but would be more expensive (and would preserve score // too if desired). String ids = params.get(ShardParams.IDS); if (ids != null) { SchemaField idField = req.getSchema().getUniqueKeyField(); List<String> idArr = StrUtils.splitSmart(ids, ",", true); int[] luceneIds = new int[idArr.size()]; int docs = 0; for (int i = 0; i < idArr.size(); i++) { int id = req.getSearcher() .getFirstMatch( new Term(idField.getName(), idField.getType().toInternal(idArr.get(i)))); if (id >= 0) luceneIds[docs++] = id; } DocListAndSet res = new DocListAndSet(); res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0); if (rb.isNeedDocSet()) { List<Query> queries = new ArrayList<Query>(); queries.add(rb.getQuery()); List<Query> filters = rb.getFilters(); if (filters != null) queries.addAll(filters); res.docSet = searcher.getDocSet(queries); } rb.setResults(res); rsp.add("response", rb.getResults().docList); return; } SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand(); cmd.setTimeAllowed(timeAllowed); SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult(); // // grouping / field collapsing // boolean doGroup = params.getBool(GroupParams.GROUP, false); if (doGroup) { try { cmd.groupCommands = new ArrayList<Grouping.Command>(); String[] fields = params.getParams(GroupParams.GROUP_FIELD); String[] funcs = params.getParams(GroupParams.GROUP_FUNC); String[] queries = params.getParams(GroupParams.GROUP_QUERY); String groupSortStr = params.get(GroupParams.GROUP_SORT); Sort groupSort = groupSortStr != null ? QueryParsing.parseSort(groupSortStr, req.getSchema()) : null; int limitDefault = cmd.getLen(); // this is normally from "rows" int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1); // temporary: implement all group-by-field as group-by-func if (funcs == null) { funcs = fields; } else if (fields != null) { // catenate functions and fields String[] both = new String[fields.length + funcs.length]; System.arraycopy(fields, 0, both, 0, fields.length); System.arraycopy(funcs, 0, both, fields.length, funcs.length); funcs = both; } if (funcs != null) { for (String groupByStr : funcs) { QParser parser = QParser.getParser(groupByStr, "func", rb.req); Query q = parser.getQuery(); Grouping.CommandFunc gc = new Grouping.CommandFunc(); gc.groupSort = groupSort; if (q instanceof FunctionQuery) { gc.groupBy = ((FunctionQuery) q).getValueSource(); } else { gc.groupBy = new QueryValueSource(q, 0.0f); } gc.key = groupByStr; gc.groupLimit = limitDefault; gc.docsPerGroup = docsPerGroupDefault; cmd.groupCommands.add(gc); } } if (cmd.groupCommands.size() == 0) cmd.groupCommands = null; if (cmd.groupCommands != null) { if (rb.doHighlights || rb.isDebug()) { // we need a single list of the returned docs cmd.setFlags(SolrIndexSearcher.GET_DOCLIST); } searcher.search(result, cmd); rb.setResult(result); rsp.add("grouped", result.groupedResults); // TODO: get "hits" a different way to log return; } } catch (ParseException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } } // normal search result searcher.search(result, cmd); rb.setResult(result); rsp.add("response", rb.getResults().docList); rsp.getToLog().add("hits", rb.getResults().docList.matches()); doFieldSortValues(rb, searcher); doPrefetch(rb); }
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); IndexReader reader = searcher.getReader(); SolrParams params = req.getParams(); int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT); // Always show the core lucene info rsp.add("index", getIndexInfo(reader, numTerms > 0)); Integer docId = params.getInt(DOC_ID); if (docId == null && params.get(ID) != null) { // Look for something with a given solr ID SchemaField uniqueKey = schema.getUniqueKeyField(); String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); if (docId < 0) { throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); } } // Read the document from the index if (docId != null) { Document doc = null; try { doc = reader.document(docId); } catch (Exception ex) { } if (doc == null) { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + docId); } SimpleOrderedMap<Object> info = getDocumentFieldsInfo(doc, docId, reader, schema); SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<Object>(); docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); rsp.add("doc", docinfo); } else if ("schema".equals(params.get("show"))) { rsp.add("schema", getSchemaInfo(req.getSchema())); } else { // If no doc is given, show all fields and top terms Set<String> fields = null; if (params.get(CommonParams.FL) != null) { fields = new HashSet<String>(); for (String f : params.getParams(CommonParams.FL)) { fields.add(f); } } rsp.add("fields", getIndexedFieldsInfo(searcher, fields, numTerms)); } // Add some generally helpful information NamedList<Object> info = new SimpleOrderedMap<Object>(); info.add("key", getFieldFlagsKey()); info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); rsp.add("info", info); rsp.setHttpCaching(false); }
/** @since solr 1.3 */ void processDelete(SolrQueryRequest req, UpdateRequestProcessor processor, XMLStreamReader parser) throws XMLStreamException, IOException { // Parse the command DeleteUpdateCommand deleteCmd = new DeleteUpdateCommand(req); // First look for commitWithin parameter on the request, will be overwritten for individual // <delete>'s SolrParams params = req.getParams(); deleteCmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); for (int i = 0; i < parser.getAttributeCount(); i++) { String attrName = parser.getAttributeLocalName(i); String attrVal = parser.getAttributeValue(i); if ("fromPending".equals(attrName)) { // deprecated } else if ("fromCommitted".equals(attrName)) { // deprecated } else if (UpdateRequestHandler.COMMIT_WITHIN.equals(attrName)) { deleteCmd.commitWithin = Integer.parseInt(attrVal); } else { log.warn("XML element <delete> has invalid XML attr: " + attrName); } } StringBuilder text = new StringBuilder(); while (true) { int event = parser.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: String mode = parser.getLocalName(); if (!("id".equals(mode) || "query".equals(mode))) { String msg = "XML element <delete> has invalid XML child element: " + mode; log.warn(msg); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg); } text.setLength(0); if ("id".equals(mode)) { for (int i = 0; i < parser.getAttributeCount(); i++) { String attrName = parser.getAttributeLocalName(i); String attrVal = parser.getAttributeValue(i); if (UpdateRequestHandler.VERSION.equals(attrName)) { deleteCmd.setVersion(Long.parseLong(attrVal)); } if (UpdateRequest.ROUTE.equals(attrName)) { deleteCmd.setRoute(attrVal); } } } break; case XMLStreamConstants.END_ELEMENT: String currTag = parser.getLocalName(); if ("id".equals(currTag)) { deleteCmd.setId(text.toString()); } else if ("query".equals(currTag)) { deleteCmd.setQuery(text.toString()); } else if ("delete".equals(currTag)) { return; } else { String msg = "XML element <delete> has invalid XML (closing) child element: " + currTag; log.warn(msg); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg); } processor.processDelete(deleteCmd); deleteCmd.clear(); break; // Add everything to the text case XMLStreamConstants.SPACE: case XMLStreamConstants.CDATA: case XMLStreamConstants.CHARACTERS: text.append(parser.getText()); break; } } }
/** @since solr 1.2 */ void processUpdate(SolrQueryRequest req, UpdateRequestProcessor processor, XMLStreamReader parser) throws XMLStreamException, IOException, FactoryConfigurationError { AddUpdateCommand addCmd = null; SolrParams params = req.getParams(); while (true) { int event = parser.next(); switch (event) { case XMLStreamConstants.END_DOCUMENT: parser.close(); return; case XMLStreamConstants.START_ELEMENT: String currTag = parser.getLocalName(); if (currTag.equals(UpdateRequestHandler.ADD)) { log.trace("SolrCore.update(add)"); addCmd = new AddUpdateCommand(req); // First look for commitWithin parameter on the request, will be overwritten for // individual <add>'s addCmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); addCmd.overwrite = params.getBool(UpdateParams.OVERWRITE, true); for (int i = 0; i < parser.getAttributeCount(); i++) { String attrName = parser.getAttributeLocalName(i); String attrVal = parser.getAttributeValue(i); if (UpdateRequestHandler.OVERWRITE.equals(attrName)) { addCmd.overwrite = StrUtils.parseBoolean(attrVal); } else if (UpdateRequestHandler.COMMIT_WITHIN.equals(attrName)) { addCmd.commitWithin = Integer.parseInt(attrVal); } else { log.warn("XML element <add> has invalid XML attr: " + attrName); } } } else if ("doc".equals(currTag)) { if (addCmd != null) { log.trace("adding doc..."); addCmd.clear(); addCmd.solrDoc = readDoc(parser); processor.processAdd(addCmd); } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unexpected <doc> tag without an <add> tag surrounding it."); } } else if (UpdateRequestHandler.COMMIT.equals(currTag) || UpdateRequestHandler.OPTIMIZE.equals(currTag)) { log.trace("parsing " + currTag); CommitUpdateCommand cmd = new CommitUpdateCommand(req, UpdateRequestHandler.OPTIMIZE.equals(currTag)); ModifiableSolrParams mp = new ModifiableSolrParams(); for (int i = 0; i < parser.getAttributeCount(); i++) { String attrName = parser.getAttributeLocalName(i); String attrVal = parser.getAttributeValue(i); mp.set(attrName, attrVal); } RequestHandlerUtils.validateCommitParams(mp); SolrParams p = SolrParams.wrapDefaults( mp, req.getParams()); // default to the normal request params for commit options RequestHandlerUtils.updateCommit(cmd, p); processor.processCommit(cmd); } // end commit else if (UpdateRequestHandler.ROLLBACK.equals(currTag)) { log.trace("parsing rollback"); RollbackUpdateCommand cmd = new RollbackUpdateCommand(req); processor.processRollback(cmd); } // end rollback else if (UpdateRequestHandler.DELETE.equals(currTag)) { log.trace("parsing delete"); processDelete(req, processor, parser); } // end delete break; } } }
@Override protected Directory create(String path, DirContext dirContext) throws IOException { LOG.info("creating directory factory for path {}", path); Configuration conf = getConf(); if (metrics == null) { metrics = new Metrics(conf); } boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true); boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED, true); boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true); if (blockCacheEnabled) { LOG.warn("Block cache is enabled: " + BLOCKCACHE_ENABLED + " == true"); } if (blockCacheWriteEnabled) { LOG.warn( "Using " + BLOCKCACHE_WRITE_ENABLED + " is currently buggy and can result in readers seeing a corrupted view of the index."); } Directory dir = null; if (blockCacheEnabled && dirContext != DirContext.META_DATA) { int numberOfBlocksPerBank = params.getInt(NUMBEROFBLOCKSPERBANK, 16384); int blockSize = BlockDirectory.BLOCK_SIZE; int bankCount = params.getInt(BLOCKCACHE_SLAB_COUNT, 1); boolean directAllocation = params.getBool(BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true); BlockCache blockCache; int slabSize = numberOfBlocksPerBank * blockSize; LOG.info( "Number of slabs of block cache [{}] with direct memory allocation set to [{}]", bankCount, directAllocation); LOG.info( "Block cache target memory usage, slab size of [{}] will allocate [{}] slabs and use ~[{}] bytes", new Object[] {slabSize, bankCount, ((long) bankCount * (long) slabSize)}); int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128); int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128); BufferStore.initNewBuffer(bufferSize, bufferCount); long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank * (long) blockSize; try { blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize); } catch (OutOfMemoryError e) { throw new RuntimeException( "The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)" + " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap," + " your java heap size might not be large enough." + " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", e); } Cache cache = new BlockDirectoryCache(blockCache, metrics); HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf); dir = new BlockDirectory( "solrcore", hdfsDirectory, cache, null, blockCacheReadEnabled, blockCacheWriteEnabled); } else { dir = new HdfsDirectory(new Path(path), conf); } boolean nrtCachingDirectory = params.getBool(NRTCACHINGDIRECTORY_ENABLE, false); if (nrtCachingDirectory) { double nrtCacheMaxMergeSizeMB = params.getInt(NRTCACHINGDIRECTORY_MAXMERGESIZEMB, 16); double nrtCacheMaxCacheMB = params.getInt(NRTCACHINGDIRECTORY_MAXCACHEMB, 192); return new NRTCachingDirectory(dir, nrtCacheMaxMergeSizeMB, nrtCacheMaxCacheMB); } return dir; }