@Override public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) { return; } sreq.params.remove(COMPONENT_NAME); if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) { String fl = sreq.params.get(CommonParams.FL, "*"); // if fl=* then we don't need check if (fl.indexOf('*') >= 0) return; Set<String> fields = getSearchClusteringEngine(rb).getFieldsToLoad(rb.req); if (fields == null || fields.size() == 0) return; StringBuilder sb = new StringBuilder(); String[] flparams = fl.split("[,\\s]+"); Set<String> flParamSet = new HashSet<String>(flparams.length); for (String flparam : flparams) { // no need trim() because of split() by \s+ flParamSet.add(flparam); } for (String aFieldToLoad : fields) { if (!flParamSet.contains(aFieldToLoad)) { sb.append(',').append(aFieldToLoad); } } if (sb.length() > 0) { sreq.params.set(CommonParams.FL, fl + sb.toString()); } } }
@Override public void finishStage(ResponseBuilder rb) { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) { return; } if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { SearchClusteringEngine engine = getSearchClusteringEngine(rb); if (engine != null) { SolrDocumentList solrDocList = (SolrDocumentList) rb.rsp.getValues().get("response"); // TODO: Currently, docIds is set to null in distributed environment. // This causes CarrotParams.PRODUCE_SUMMARY doesn't work. // To work CarrotParams.PRODUCE_SUMMARY under distributed mode, we can choose either one of: // (a) In each shard, ClusteringComponent produces summary and finishStage() // merges these summaries. // (b) Adding doHighlighting(SolrDocumentList, ...) method to SolrHighlighter and // making SolrHighlighter uses "external text" rather than stored values to produce // snippets. Map<SolrDocument, Integer> docIds = null; Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req); rb.rsp.add("clusters", clusters); } else { String name = getClusteringEngineName(rb); log.warn("No engine for: " + name); } } }
/** * Generates a list of Highlighted query fragments for each item in a list of documents, or * returns null if highlighting is disabled. * * @param docs query results * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * @return NamedList containing a NamedList for each document, which in turns contains sets * (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting( DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
/** Generate external process results (if they have not already been generated). */ @Override public void prepare(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(getName(), false)) { return; } XJoinResults<?> results = (XJoinResults<?>) rb.req.getContext().get(getResultsTag()); if (results != null) { return; } // generate external process results, by passing 'external' prefixed parameters // from the query string to our factory String prefix = getName() + "." + XJoinParameters.EXTERNAL_PREFIX + "."; ModifiableSolrParams externalParams = new ModifiableSolrParams(); for (Iterator<String> it = params.getParameterNamesIterator(); it.hasNext(); ) { String name = it.next(); if (name.startsWith(prefix)) { externalParams.set(name.substring(prefix.length()), params.get(name)); } } results = factory.getResults(externalParams); rb.req.getContext().put(getResultsTag(), results); }
public void processGetVersions(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } int nVersions = params.getInt("getVersions", -1); if (nVersions == -1) return; String sync = params.get("sync"); if (sync != null) { processSync(rb, nVersions, sync); return; } UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog(); if (ulog == null) return; UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates(); try { rb.rsp.add("versions", recentUpdates.getVersions(nVersions)); } finally { recentUpdates.close(); // cache this somehow? } }
@Test public void testGroupCollapseFilterFieldAllFiltered() throws IOException { mockResponse(true); when(rb.grouping()).thenReturn(true); when(params.getBool(GroupCollapseParams.GROUP_COLLAPSE, false)).thenReturn(true); when(params.get(GroupCollapseParams.GROUP_COLLAPSE_FL)) .thenReturn("price,discount,isCloseout,color,colorFamily"); when(params.get(GroupCollapseParams.GROUP_COLLAPSE_FF)).thenReturn(FIELD_CLOSEOUT); component.process(rb); verify(rb).grouping(); verify(rb).getGroupingSpec(); verify(params).getBool(GroupCollapseParams.GROUP_COLLAPSE, false); verify(params).get(GroupCollapseParams.GROUP_COLLAPSE_FL); verify(params).get(GroupCollapseParams.GROUP_COLLAPSE_FF); verify(params).getParams(GroupCollapseParams.GROUP_COLLAPSE_FQ); verifyNoMoreInteractions(rb); verifyNoMoreInteractions(params); ArgumentCaptor<NamedList> namedListArgument = ArgumentCaptor.forClass(NamedList.class); verify(rsp).add(eq("groups_summary"), namedListArgument.capture()); NamedList groupsSummary = namedListArgument.getValue(); NamedList productId = (NamedList) groupsSummary.get("productId"); assertNotNull(productId); Set<String> colorFamilies = new HashSet<String>(); colorFamilies.add("RedColorFamily"); colorFamilies.add("BlackColorFamily"); verifyProductSummary( (NamedList) productId.get("product1"), 80.0f, 100.0f, 0.0f, 20.0f, 2, colorFamilies); colorFamilies = new HashSet<String>(); colorFamilies.add("OrangeColorFamily"); colorFamilies.add("BrownColorFamily"); verifyProductSummary( (NamedList) productId.get("product2"), 60.0f, 80.0f, 20.0f, 40.0f, 2, colorFamilies); }
@Override public void prepare(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } }
/** * Handle "UNLOAD" Action * * @param req * @param rsp * @return true if a modification has resulted that requires persistance of the CoreContainer * configuration. */ protected boolean handleUnloadAction(SolrQueryRequest req, SolrQueryResponse rsp) throws SolrException { SolrParams params = req.getParams(); String cname = params.get(CoreAdminParams.CORE); SolrCore core = coreContainer.remove(cname); if (core == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "No such core exists '" + cname + "'"); } if (params.getBool(CoreAdminParams.DELETE_INDEX, false)) { core.addCloseHook( new CloseHook() { @Override public void preClose(SolrCore core) {} @Override public void postClose(SolrCore core) { File dataDir = new File(core.getIndexDir()); for (File file : dataDir.listFiles()) { if (!file.delete()) { log.error(file.getAbsolutePath() + " could not be deleted on core unload"); } } if (!dataDir.delete()) log.error(dataDir.getAbsolutePath() + " could not be deleted on core unload"); } }); } core.close(); return coreContainer.isPersistent(); }
private void initParameters(SolrParams parameters) { if (parameters != null) { this.setEnabled(parameters.getBool("enabled", true)); this.inputFieldname = parameters.get(INPUT_FIELD_PARAM, DEFAULT_INPUT_FIELDNAME); this.boostFieldname = parameters.get(BOOST_FIELD_PARAM, DEFAULT_BOOST_FIELDNAME); this.boostFilename = parameters.get(BOOST_FILENAME_PARAM); } }
@Test public void testNoGroupCollapse() throws IOException { when(rb.grouping()).thenReturn(true); when(params.getBool(GroupCollapseParams.GROUP_COLLAPSE, false)).thenReturn(false); component.process(rb); verify(rb).grouping(); verify(params).getBool(GroupCollapseParams.GROUP_COLLAPSE, false); verifyNoMoreInteractions(rb); verifyNoMoreInteractions(params); }
/** * Handle "SWAP" action * * @param req * @param rsp * @return true if a modification has resulted that requires persistance of the CoreContainer * configuration. */ protected boolean handleSwapAction(SolrQueryRequest req, SolrQueryResponse rsp) { final SolrParams params = req.getParams(); final SolrParams required = params.required(); final String cname = params.get(CoreAdminParams.CORE); boolean doPersist = params.getBool(CoreAdminParams.PERSISTENT, coreContainer.isPersistent()); String other = required.get(CoreAdminParams.OTHER); coreContainer.swap(cname, other); return doPersist; }
@Override public void init(SolrParams params) { super.init(params); discountOverlaps = params.getBool("discountOverlaps", true); distribution = parseDistribution(params.get("distribution")); lambda = parseLambda(params.get("lambda")); normalization = DFRSimilarityFactory.parseNormalization( params.get("normalization"), params.get("c"), params.get("mu"), params.get("z")); }
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); params = adjustParams(params); req.setParams(params); if (params.get("action") != null) { handleAdmin(req, rsp, params); return; } TupleStream tupleStream; try { tupleStream = this.streamFactory.constructStream(params.get("expr")); } catch (Exception e) { // Catch exceptions that occur while the stream is being created. This will include streaming // expression parse rules. SolrException.log(logger, e); rsp.add("result-set", new DummyErrorStream(e)); return; } int worker = params.getInt("workerID", 0); int numWorkers = params.getInt("numWorkers", 1); StreamContext context = new StreamContext(); context.workerID = worker; context.numWorkers = numWorkers; context.setSolrClientCache(clientCache); context.setModelCache(modelCache); context.put("core", this.coreName); context.put("solr-core", req.getCore()); tupleStream.setStreamContext(context); // if asking for explanation then go get it if (params.getBool("explain", false)) { rsp.add("explanation", tupleStream.toExplanation(this.streamFactory)); } if (tupleStream instanceof DaemonStream) { DaemonStream daemonStream = (DaemonStream) tupleStream; if (daemons.containsKey(daemonStream.getId())) { daemons.remove(daemonStream.getId()).close(); } daemonStream.setDaemons(daemons); daemonStream.open(); // This will start the deamonStream daemons.put(daemonStream.getId(), daemonStream); rsp.add( "result-set", new DaemonResponseStream("Deamon:" + daemonStream.getId() + " started on " + coreName)); } else { rsp.add("result-set", new TimerStream(new ExceptionStream(tupleStream))); } }
public void processGetUpdates(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } String versionsStr = params.get("getUpdates"); if (versionsStr == null) return; UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog(); if (ulog == null) return; List<String> versions = StrUtils.splitSmart(versionsStr, ",", true); List<Object> updates = new ArrayList<Object>(versions.size()); long minVersion = Long.MAX_VALUE; // TODO: get this from cache instead of rebuilding? UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates(); try { for (String versionStr : versions) { long version = Long.parseLong(versionStr); try { Object o = recentUpdates.lookup(version); if (o == null) continue; if (version > 0) { minVersion = Math.min(minVersion, version); } // TODO: do any kind of validation here? updates.add(o); } catch (SolrException e) { log.warn("Exception reading log for updates", e); } catch (ClassCastException e) { log.warn("Exception reading log for updates", e); } } // Must return all delete-by-query commands that occur after the first add requested // since they may apply. updates.addAll(recentUpdates.getDeleteByQuery(minVersion)); rb.rsp.add("updates", updates); } finally { recentUpdates.close(); // cache this somehow? } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } String name = getClusteringEngineName(rb); boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false); if (useResults == true) { SearchClusteringEngine engine = getSearchClusteringEngine(rb); if (engine != null) { DocListAndSet results = rb.getResults(); Map<SolrDocument, Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size()); SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds); Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req); rb.rsp.add("clusters", clusters); } else { log.warn("No engine for: " + name); } } boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false); if (useCollection == true) { DocumentClusteringEngine engine = documentClusteringEngines.get(name); if (engine != null) { boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false); NamedList<?> nl = null; // TODO: This likely needs to be made into a background task that runs in an executor if (useDocSet == true) { nl = engine.cluster(rb.getResults().docSet, params); } else { nl = engine.cluster(params); } rb.rsp.add("clusters", nl); } else { log.warn("No engine for " + name); } } }
@Override public void init(NamedList args) { params = SolrParams.toSolrParams(args); this.hdfsDataDir = params.get(HDFS_HOME); if (this.hdfsDataDir != null && this.hdfsDataDir.length() == 0) { this.hdfsDataDir = null; } boolean kerberosEnabled = params.getBool(KERBEROS_ENABLED, false); LOG.info("Solr Kerberos Authentication " + (kerberosEnabled ? "enabled" : "disabled")); if (kerberosEnabled) { initKerberos(); } }
public ExtractingDocumentLoader( SolrQueryRequest req, UpdateRequestProcessor processor, TikaConfig config, ParseContextConfig parseContextConfig, SolrContentHandlerFactory factory) { this.params = req.getParams(); this.core = req.getCore(); this.config = config; this.parseContextConfig = parseContextConfig; this.processor = processor; templateAdd = new AddUpdateCommand(req); templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true); templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); // this is lightweight autoDetectParser = new AutoDetectParser(config); this.factory = factory; ignoreTikaException = params.getBool(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); }
/** * Match up search results and add corresponding data for each result (if we have query results * available). */ @Override @SuppressWarnings({"rawtypes", "unchecked"}) public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(getName(), false)) { return; } XJoinResults<?> results = (XJoinResults<?>) rb.req.getContext().get(getResultsTag()); if (results == null || rb.getResults() == null) { return; } // general results FieldAppender appender = new FieldAppender( (String) params.get(getName() + "." + XJoinParameters.RESULTS_FIELD_LIST, "*")); NamedList general = appender.addNamedList(rb.rsp.getValues(), getName(), results); // per join id results FieldAppender docAppender = new FieldAppender( (String) params.get(getName() + "." + XJoinParameters.DOC_FIELD_LIST, "*")); Set<String> joinFields = new HashSet<>(); joinFields.add(joinField); List<String> joinIds = new ArrayList<>(); for (Iterator<Integer> it = docIterator(rb); it.hasNext(); ) { StoredDocument doc = rb.req.getSearcher().doc(it.next(), joinFields); for (String joinId : doc.getValues(joinField)) { if (!joinIds.contains(joinId)) { joinIds.add(joinId); } } } for (String joinId : joinIds) { Object object = results.getResult(joinId); if (object == null) continue; NamedList external = new NamedList<>(); general.add("external", external); external.add("joinId", joinId); if (object instanceof Iterable) { for (Object item : (Iterable) object) { docAppender.addNamedList(external, "doc", item); } } else { docAppender.addNamedList(external, "doc", object); } } }
/** * Executes a query and compares the results with the data available in the {@link PivotField} * constraint -- this method is not recursive, and doesn't check anything about the sub-pivots (if * any). * * @param pivotName pivot name * @param constraint filters on pivot * @param params base solr parameters */ private void assertPivotData(String pivotName, PivotField constraint, SolrParams params) throws SolrServerException, IOException { SolrParams p = SolrParams.wrapDefaults(params("rows", "0"), params); QueryResponse res = cloudClient.query(p); String msg = pivotName + ": " + p; assertNumFound(msg, constraint.getCount(), res); if (p.getBool(StatsParams.STATS, false)) { // only check stats if stats expected assertPivotStats(msg, constraint, res); } }
@Test public void testGroupCollapseNoGroups() throws IOException { NamedList values = mock(NamedList.class); when(rsp.getValues()).thenReturn(values); when(values.get("grouped")).thenReturn(null); when(rb.grouping()).thenReturn(true); when(params.getBool(GroupCollapseParams.GROUP_COLLAPSE, false)).thenReturn(true); when(params.get(GroupCollapseParams.GROUP_COLLAPSE_FL)).thenReturn("price,discount,isCloseout"); component.process(rb); verify(rsp, never()).add(anyString(), anyObject()); }
public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME); boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false); if (useResults == true) { SearchClusteringEngine engine = searchClusteringEngines.get(name); if (engine != null) { DocListAndSet results = rb.getResults(); Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req); rb.rsp.add("clusters", clusters); } else { log.warn("No engine for: " + name); } } boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false); if (useCollection == true) { DocumentClusteringEngine engine = documentClusteringEngines.get(name); if (engine != null) { boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false); NamedList nl = null; // TODO: This likely needs to be made into a background task that runs in an executor if (useDocSet == true) { nl = engine.cluster(rb.getResults().docSet, params); } else { nl = engine.cluster(params); } rb.rsp.add("clusters", nl); } else { log.warn("No engine for " + name); } } }
@Override public void prepare(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (params.getBool(TermsParams.TERMS, false)) { rb.doTerms = true; } // TODO: temporary... this should go in a different component. String shards = params.get(ShardParams.SHARDS); if (shards != null) { if (params.get(ShardParams.SHARDS_QT) == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "No shards.qt parameter specified"); } List<String> lst = StrUtils.splitSmart(shards, ",", true); rb.shards = lst.toArray(new String[lst.size()]); } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) return; SolrIndexSearcher searcher = rb.req.getSearcher(); IndexSchema schema = searcher.getSchema(); if (schema.getUniqueKeyField() == null) return; ResultContext rc = (ResultContext) rb.rsp.getResponse(); DocList docs = rc.getDocList(); if (docs.hasScores()) { processScores(rb, docs, schema, searcher); } else { processIds(rb, docs, schema, searcher); } }
public NamedList getHeatmapCounts() throws IOException, SyntaxError { final NamedList<Object> resOuter = new SimpleOrderedMap<>(); String[] unparsedFields = rb.req.getParams().getParams(FacetParams.FACET_HEATMAP); if (unparsedFields == null || unparsedFields.length == 0) { return resOuter; } if (global.getBool(GroupParams.GROUP_FACET, false)) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Heatmaps can't be used with " + GroupParams.GROUP_FACET); } for (String unparsedField : unparsedFields) { final ParsedParams parsed = parseParams( FacetParams.FACET_HEATMAP, unparsedField); // populates facetValue, rb, params, docs resOuter.add( parsed.key, SpatialHeatmapFacets.getHeatmapForField( parsed.key, parsed.facetValue, rb, parsed.params, parsed.docs)); } return resOuter; }
CSVLoaderBase(SolrQueryRequest req, UpdateRequestProcessor processor) { this.processor = processor; this.params = req.getParams(); this.literals = new HashMap<>(); templateAdd = new AddUpdateCommand(req); templateAdd.overwrite = params.getBool(OVERWRITE, true); templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); strategy = new CSVStrategy( ',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false, false, false, true); String sep = params.get(SEPARATOR); if (sep != null) { if (sep.length() != 1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'"); strategy.setDelimiter(sep.charAt(0)); } String encapsulator = params.get(ENCAPSULATOR); if (encapsulator != null) { if (encapsulator.length() != 1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'"); } String escape = params.get(ESCAPE); if (escape != null) { if (escape.length() != 1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'"); } rowId = params.get(ROW_ID); rowIdOffset = params.getInt(ROW_ID_OFFSET, 0); // if only encapsulator or escape is set, disable the other escaping mechanism if (encapsulator == null && escape != null) { strategy.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED); strategy.setEscape(escape.charAt(0)); } else { if (encapsulator != null) { strategy.setEncapsulator(encapsulator.charAt(0)); } if (escape != null) { char ch = escape.charAt(0); strategy.setEscape(ch); if (ch == '\\') { // If the escape is the standard backslash, then also enable // unicode escapes (it's harmless since 'u' would not otherwise // be escaped. strategy.setUnicodeEscapeInterpretation(true); } } } String fn = params.get(FIELDNAMES); fieldnames = fn != null ? commaSplit.split(fn, -1) : null; Boolean hasHeader = params.getBool(HEADER); skipLines = params.getInt(SKIPLINES, 0); if (fieldnames == null) { if (null == hasHeader) { // assume the file has the headers if they aren't supplied in the args hasHeader = true; } else if (!hasHeader) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "CSVLoader: must specify fieldnames=<fields>* or header=true"); } } else { // if the fieldnames were supplied and the file has a header, we need to // skip over that header. if (hasHeader != null && hasHeader) skipLines++; prepareFields(); } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(TermsParams.TERMS, false)) return; String[] fields = params.getParams(TermsParams.TERMS_FIELD); NamedList<Object> termsResult = new SimpleOrderedMap<>(); rb.rsp.add("terms", termsResult); if (fields == null || fields.length == 0) return; int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String lowerStr = params.get(TermsParams.TERMS_LOWER); String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader(); Fields lfields = indexReader.fields(); for (String field : fields) { NamedList<Integer> fieldTerms = new NamedList<>(); termsResult.add(field, fieldTerms); Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field continue; } FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // prefix must currently be text BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix); BytesRef upperBytes = null; if (upperStr != null) { upperBytes = new BytesRef(); ft.readableToIndexed(upperStr, upperBytes); } BytesRef lowerBytes; if (lowerStr == null) { // If no lower bound was specified, use the prefix lowerBytes = prefixBytes; } else { lowerBytes = new BytesRef(); if (raw) { // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists // perhaps we detect if the FieldType is non-character and expect hex if so? lowerBytes = new BytesRef(lowerStr); } else { lowerBytes = new BytesRef(); ft.readableToIndexed(lowerStr, lowerBytes); } } TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; if (lowerBytes != null) { if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); // Only advance the enum if we are excluding the lower bound and the lower Term actually // matches if (lowerIncl == false && term.equals(lowerBytes)) { term = termsEnum.next(); } } } else { // position termsEnum on first term term = termsEnum.next(); } int i = 0; BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null); CharsRef external = new CharsRef(); while (term != null && (i < limit || sort)) { boolean externalized = false; // did we fill in "external" yet for this term? // stop if the prefix doesn't match if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break; if (pattern != null) { // indexed text or external text? // TODO: support "raw" mode? ft.indexedToReadable(term, external); externalized = true; if (!pattern.matcher(external).matches()) { term = termsEnum.next(); continue; } } if (upperBytes != null) { int upperCmp = term.compareTo(upperBytes); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list if (sort) { queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); i++; } } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { if (i >= limit) break; ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); i++; } } } }
/** @since solr 1.3 */ void processDelete(UpdateRequestProcessor processor, XMLStreamReader parser, SolrParams params) throws XMLStreamException, IOException { // Parse the command DeleteUpdateCommand deleteCmd = new DeleteUpdateCommand(); deleteCmd.fromPending = true; deleteCmd.fromCommitted = true; Boolean updateStore = params.getBool(UpdateParams.UPDATE_STORE); if (updateStore != null) { deleteCmd.setUpdateStore(updateStore.booleanValue()); } for (int i = 0; i < parser.getAttributeCount(); i++) { String attrName = parser.getAttributeLocalName(i); String attrVal = parser.getAttributeValue(i); if ("fromPending".equals(attrName)) { deleteCmd.fromPending = StrUtils.parseBoolean(attrVal); } else if ("fromCommitted".equals(attrName)) { deleteCmd.fromCommitted = StrUtils.parseBoolean(attrVal); } else { XmlUpdateRequestHandler.log.warn("unexpected attribute delete/@" + attrName); } } StringBuilder text = new StringBuilder(); while (true) { int event = parser.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: String mode = parser.getLocalName(); if (!("id".equals(mode) || "query".equals(mode))) { XmlUpdateRequestHandler.log.warn("unexpected XML tag /delete/" + mode); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "unexpected XML tag /delete/" + mode); } text.setLength(0); break; case XMLStreamConstants.END_ELEMENT: String currTag = parser.getLocalName(); if ("id".equals(currTag)) { deleteCmd.id = text.toString(); } else if ("query".equals(currTag)) { deleteCmd.query = text.toString(); } else if ("delete".equals(currTag)) { return; } else { XmlUpdateRequestHandler.log.warn("unexpected XML tag /delete/" + currTag); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "unexpected XML tag /delete/" + currTag); } processor.processDelete(deleteCmd); deleteCmd.id = null; deleteCmd.query = null; break; // Add everything to the text case XMLStreamConstants.SPACE: case XMLStreamConstants.CDATA: case XMLStreamConstants.CHARACTERS: text.append(parser.getText()); break; } } }
@Override public void load( SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception { Parser parser = null; String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null); if (streamType != null) { // Cache? Parsers are lightweight to construct and thread-safe, so I'm told MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ROOT)); parser = new DefaultParser(config.getMediaTypeRegistry()).getParsers().get(mt); } else { parser = autoDetectParser; } if (parser != null) { Metadata metadata = new Metadata(); // If you specify the resource name (the filename, roughly) with this parameter, // then Tika can make use of it in guessing the appropriate MIME type: String resourceName = req.getParams().get(ExtractingParams.RESOURCE_NAME, null); if (resourceName != null) { metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, resourceName); } // Provide stream's content type as hint for auto detection if (stream.getContentType() != null) { metadata.add(HttpHeaders.CONTENT_TYPE, stream.getContentType()); } InputStream inputStream = null; try { inputStream = stream.getStream(); metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName()); metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo()); metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize())); metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType()); // HtmlParser and TXTParser regard Metadata.CONTENT_ENCODING in metadata String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType()); if (charset != null) { metadata.add(HttpHeaders.CONTENT_ENCODING, charset); } String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION); boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false); SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, req.getSchema()); ContentHandler parsingHandler = handler; StringWriter writer = null; BaseMarkupSerializer serializer = null; if (extractOnly == true) { String extractFormat = params.get(ExtractingParams.EXTRACT_FORMAT, "xml"); writer = new StringWriter(); if (extractFormat.equals(TEXT_FORMAT)) { serializer = new TextSerializer(); serializer.setOutputCharStream(writer); serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true)); } else { serializer = new XMLSerializer(writer, new OutputFormat("XML", "UTF-8", true)); } if (xpathExpr != null) { Matcher matcher = PARSER.parse(xpathExpr); serializer .startDocument(); // The MatchingContentHandler does not invoke startDocument. See // http://tika.markmail.org/message/kknu3hw7argwiqin parsingHandler = new MatchingContentHandler(serializer, matcher); } else { parsingHandler = serializer; } } else if (xpathExpr != null) { Matcher matcher = PARSER.parse(xpathExpr); parsingHandler = new MatchingContentHandler(handler, matcher); } // else leave it as is try { // potentially use a wrapper handler for parsing, but we still need the SolrContentHandler // for getting the document. ParseContext context = parseContextConfig.create(); context.set(Parser.class, parser); context.set(HtmlMapper.class, MostlyPassthroughHtmlMapper.INSTANCE); // Password handling RegexRulesPasswordProvider epp = new RegexRulesPasswordProvider(); String pwMapFile = params.get(ExtractingParams.PASSWORD_MAP_FILE); if (pwMapFile != null && pwMapFile.length() > 0) { InputStream is = req.getCore().getResourceLoader().openResource(pwMapFile); if (is != null) { log.debug("Password file supplied: " + pwMapFile); epp.parse(is); } } context.set(PasswordProvider.class, epp); String resourcePassword = params.get(ExtractingParams.RESOURCE_PASSWORD); if (resourcePassword != null) { epp.setExplicitPassword(resourcePassword); log.debug("Literal password supplied for file " + resourceName); } parser.parse(inputStream, parsingHandler, metadata, context); } catch (TikaException e) { if (ignoreTikaException) log.warn( new StringBuilder("skip extracting text due to ") .append(e.getLocalizedMessage()) .append(". metadata=") .append(metadata.toString()) .toString()); else throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } if (extractOnly == false) { addDoc(handler); } else { // serializer is not null, so we need to call endDoc on it if using xpath if (xpathExpr != null) { serializer.endDocument(); } rsp.add(stream.getName(), writer.toString()); writer.close(); String[] names = metadata.names(); NamedList metadataNL = new NamedList(); for (int i = 0; i < names.length; i++) { String[] vals = metadata.getValues(names[i]); metadataNL.add(names[i], vals); } rsp.add(stream.getName() + "_metadata", metadataNL); } } catch (SAXException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } finally { IOUtils.closeQuietly(inputStream); } } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Stream type of " + streamType + " didn't match any known parsers. Please supply the " + ExtractingParams.STREAM_TYPE + " parameter."); } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (params.getBool(TermsParams.TERMS, false)) { String lowerStr = params.get(TermsParams.TERMS_LOWER, null); String[] fields = params.getParams(TermsParams.TERMS_FIELD); if (fields != null && fields.length > 0) { NamedList terms = new SimpleOrderedMap(); rb.rsp.add("terms", terms); int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); for (int j = 0; j < fields.length; j++) { String field = StringHelper.intern(fields[j]); FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // If no lower bound was specified, use the prefix String lower = lowerStr == null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr)); if (lower == null) lower = ""; String upper = upperStr == null ? null : (raw ? upperStr : ft.toInternal(upperStr)); Term lowerTerm = new Term(field, lower); Term upperTerm = upper == null ? null : new Term(field, upper); TermEnum termEnum = rb.req .getSearcher() .getReader() .terms(lowerTerm); // this will be positioned ready to go int i = 0; BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null); NamedList fieldTerms = new NamedList(); terms.add(field, fieldTerms); Term lowerTestTerm = termEnum.term(); // Only advance the enum if we are excluding the lower bound and the lower Term actually // matches if (lowerTestTerm != null && lowerIncl == false && lowerTestTerm.field() == field // intern'd comparison && lowerTestTerm.text().equals(lower)) { termEnum.next(); } while (i < limit || sort) { Term theTerm = termEnum.term(); // check for a different field, or the end of the index. if (theTerm == null || field != theTerm.field()) // intern'd comparison break; String indexedText = theTerm.text(); // stop if the prefix doesn't match if (prefix != null && !indexedText.startsWith(prefix)) break; if (pattern != null && !pattern.matcher(indexedText).matches()) { termEnum.next(); continue; } if (upperTerm != null) { int upperCmp = theTerm.compareTo(upperTerm); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are // satisfied. int docFreq = termEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list String label = raw ? indexedText : ft.indexedToReadable(indexedText); if (sort) { queue.add(new CountPair<String, Integer>(label, docFreq)); } else { fieldTerms.add(label, docFreq); i++; } } termEnum.next(); } termEnum.close(); if (sort) { for (CountPair<String, Integer> item : queue) { if (i < limit) { fieldTerms.add(item.key, item.val); i++; } else { break; } } } } } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); } } }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return; } NamedList<Object> termVectors = new NamedList<Object>(); rb.rsp.add(TERM_VECTORS, termVectors); IndexSchema schema = rb.req.getSchema(); SchemaField keyField = schema.getUniqueKeyField(); String uniqFieldName = null; if (keyField != null) { uniqFieldName = keyField.getName(); termVectors.add("uniqueKeyFieldName", uniqFieldName); } FieldOptions allFields = new FieldOptions(); // figure out what options we have, and try to get the appropriate vector allFields.termFreq = params.getBool(TermVectorParams.TF, false); allFields.positions = params.getBool(TermVectorParams.POSITIONS, false); allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false); allFields.docFreq = params.getBool(TermVectorParams.DF, false); allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false); // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false); // short cut to all values. if (params.getBool(TermVectorParams.ALL, false)) { allFields.termFreq = true; allFields.positions = true; allFields.offsets = true; allFields.docFreq = true; allFields.tfIdf = true; } // Build up our per field mapping Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>(); NamedList<List<String>> warnings = new NamedList<List<String>>(); List<String> noTV = new ArrayList<String>(); List<String> noPos = new ArrayList<String>(); List<String> noOff = new ArrayList<String>(); Set<String> fields = getFields(rb); if (null != fields) { // we have specific fields to retrieve, or no fields for (String field : fields) { // workarround SOLR-3523 if (null == field || "score".equals(field)) continue; // we don't want to issue warnings about the uniqueKey field // since it can cause lots of confusion in distributed requests // where the uniqueKey field is injected into the fl for merging final boolean fieldIsUniqueKey = field.equals(uniqFieldName); SchemaField sf = schema.getFieldOrNull(field); if (sf != null) { if (sf.storeTermVector()) { FieldOptions option = fieldOptions.get(field); if (option == null) { option = new FieldOptions(); option.fieldName = field; fieldOptions.put(field, option); } // get the per field mappings option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq); option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq); option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf); // Validate these are even an option option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions); if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) { noPos.add(field); } option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets); if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) { noOff.add(field); } } else { // field doesn't have term vectors if (!fieldIsUniqueKey) noTV.add(field); } } else { // field doesn't exist throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field); } } } // else, deal with all fields // NOTE: currently all typs of warnings are schema driven, and garunteed // to be consistent across all shards - if additional types of warnings // are added that might be differnet between shards, finishStage() needs // to be changed to account for that. boolean hasWarnings = false; if (!noTV.isEmpty()) { warnings.add("noTermVectors", noTV); hasWarnings = true; } if (!noPos.isEmpty()) { warnings.add("noPositions", noPos); hasWarnings = true; } if (!noOff.isEmpty()) { warnings.add("noOffsets", noOff); hasWarnings = true; } if (hasWarnings) { termVectors.add("warnings", warnings); } DocListAndSet listAndSet = rb.getResults(); List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS)); Iterator<Integer> iter; if (docIds != null && !docIds.isEmpty()) { iter = docIds.iterator(); } else { DocList list = listAndSet.docList; iter = list.iterator(); } SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getIndexReader(); // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors // Only load the id field to get the uniqueKey of that // field final String finalUniqFieldName = uniqFieldName; final List<String> uniqValues = new ArrayList<String>(); // TODO: is this required to be single-valued? if so, we should STOP // once we find it... final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() { @Override public void stringField(FieldInfo fieldInfo, String value) { uniqValues.add(value); } @Override public void intField(FieldInfo fieldInfo, int value) { uniqValues.add(Integer.toString(value)); } @Override public void longField(FieldInfo fieldInfo, long value) { uniqValues.add(Long.toString(value)); } @Override public Status needsField(FieldInfo fieldInfo) { return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO; } }; TermsEnum termsEnum = null; while (iter.hasNext()) { Integer docId = iter.next(); NamedList<Object> docNL = new NamedList<Object>(); if (keyField != null) { reader.document(docId, getUniqValue); String uniqVal = null; if (uniqValues.size() != 0) { uniqVal = uniqValues.get(0); uniqValues.clear(); docNL.add("uniqueKey", uniqVal); termVectors.add(uniqVal, docNL); } } else { // support for schemas w/o a unique key, termVectors.add("doc-" + docId, docNL); } if (null != fields) { for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) { final String field = entry.getKey(); final Terms vector = reader.getTermVector(docId, field); if (vector != null) { termsEnum = vector.iterator(termsEnum); mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field); } } } else { // extract all fields final Fields vectors = reader.getTermVectors(docId); for (String field : vectors) { Terms terms = vectors.terms(field); if (terms != null) { termsEnum = terms.iterator(termsEnum); mapOneVector(docNL, allFields, reader, docId, termsEnum, field); } } } } }