public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType(XML_RESPONSE_HEADER); // Talkback happens in XML form. response.setCharacterEncoding("UTF-8"); // Unicode++ request.setCharacterEncoding("UTF-8"); PrintWriter out = null; // The talkback buffer. // handle startrecord Integer startRecord = 0; if (!(request.getParameter("startRecord") == null)) { try { startRecord = Integer.parseInt(request.getParameter("startRecord")) - 1; } catch (NumberFormatException e) { startRecord = 0; } } // maximumrecords Integer maximumRecords = Integer.parseInt(this.config.getProperty("default_maximumRecords")); if (!(request.getParameter("maximumRecords") == null)) { maximumRecords = Integer.parseInt(request.getParameter("maximumRecords")); } // operation String operation = request.getParameter("operation"); // x_collection String x_collection = request.getParameter("x-collection"); if (x_collection == null) x_collection = this.config.getProperty("default_x_collection"); if (x_collection == null) operation = null; // sortkeys String sortKeys = request.getParameter("sortKeys"); // sortorder String sortOrder = request.getParameter("sortOrder"); // recordschema String recordSchema = request.getParameter("recordSchema"); if (recordSchema == null) recordSchema = "dc"; if (recordSchema.equalsIgnoreCase("dcx")) { recordSchema = "dcx"; } if (recordSchema.equalsIgnoreCase("solr")) { recordSchema = "solr"; } // query request String query = request.getParameter("query"); String q = request.getParameter("q"); // who is requestor ? String remote_ip = request.getHeader("X-FORWARDED-FOR"); if (remote_ip == null) { remote_ip = request.getRemoteAddr().trim(); } else { remote_ip = request.getHeader("X-FORWARDED-FOR"); } // handle debug Boolean debug = Boolean.parseBoolean(request.getParameter("debug")); if (!debug) { out = new PrintWriter(new OutputStreamWriter(response.getOutputStream(), "UTF8"), true); } // handle query if ((query == null) && (q != null)) { query = q; } else { if ((query != null) && (q == null)) { q = query; } else { operation = null; } } // handle operation if (operation == null) { if (query != null) { operation = "searchRetrieve"; } else { operation = "explain"; } } // searchRetrieve if (operation.equalsIgnoreCase("searchRetrieve")) { if (query == null) { operation = "explain"; log.debug(operation + ":" + query); } } // start talking back. String[] sq = {""}; String solrquery = ""; // facet String facet = null; List<FacetField> fct = null; if (request.getParameter("facet") != null) { facet = request.getParameter("facet"); log.debug("facet : " + facet); } if (operation == null) { operation = "searchretrieve"; } else { // explain response if (operation.equalsIgnoreCase("explain")) { log.debug("operation = explain"); out.write("<srw:explainResponse xmlns:srw=\"http://www.loc.gov/zing/srw/\">"); out.write("</srw:explainResponse>"); } else { // DEBUG routine operation = "searchretrieve"; String triplequery = null; if (query.matches(".*?\\[.+?\\].*?")) { // New symantic syntax triplequery = symantic_query(query); query = query.split("\\[")[0] + " " + triplequery; log.fatal(triplequery); solrquery = CQLtoLucene.translate(query, log, config); } else { solrquery = CQLtoLucene.translate(query, log, config); } log.debug(solrquery); if (debug == true) { response.setContentType(HTML_RESPONSE_HEADER); out = new PrintWriter(new OutputStreamWriter(response.getOutputStream(), "UTF8"), true); out.write("<html><body>\n\n"); out.write("'" + remote_ip + "'<br>\n"); out.write("<form action='http://www.kbresearch.nl/kbSRU'>"); out.write("<input type=text name=q value='" + query + "' size=120>"); out.write("<input type=hidden name=debug value=True>"); out.write("<input type=submit>"); out.write("<table border=1><tr><td>"); out.write("q</td><td>" + query + "</td></tr><tr>"); out.write("<td>query out</td><td>" + URLDecoder.decode(solrquery) + "</td></tr>"); out.write( "<tr><td>SOLR_URL</td><td> <a href='" + this.config.getProperty( "collection." + x_collection.toLowerCase() + ".solr_baseurl") + "/?q=" + solrquery + "'>" + this.config.getProperty( "collection." + x_collection.toLowerCase() + ".solr_baseurl") + "/select/?q=" + solrquery + "</a><br>" + this.config.getProperty("solr_url") + solrquery + "</td></tr>"); out.write( "<b>SOLR_QUERY</b> : <BR> <iframe width=900 height=400 src='" + this.config.getProperty( "collection." + x_collection.toLowerCase() + ".solr_baseurl") + "/../?q=" + solrquery + "'></iframe><BR>"); out.write( "<b>SRU_QUERY</b> : <BR> <a href=" + this.config.getProperty("baseurl") + "?q=" + query + "'>" + this.config.getProperty("baseurl") + "?q=" + query + "</a><br><iframe width=901 height=400 src='http://www.kbresearch.nl/kbSRU/?q=" + query + "'></iframe><BR>"); out.write( "<br><b>JSRU_QUERY</b> : <BR><a href='http://jsru.kb.nl/sru/?query=" + query + "&x-collection=" + x_collection + "'>http://jsru.kb.nl/sru/?query=" + query + "&x-collection=GGC</a><br><iframe width=900 height=400 src='http://jsru.kb.nl/sru/?query=" + query + "&x-collection=GGC'></iframe>"); } else { // XML SearchRetrieve response String url = this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl"); String buffer = ""; CommonsHttpSolrServer server = null; server = new CommonsHttpSolrServer(url); log.fatal("URSING " + url); server.setParser(new XMLResponseParser()); int numfound = 0; try { SolrQuery do_query = new SolrQuery(); do_query.setQuery(solrquery); do_query.setRows(maximumRecords); do_query.setStart(startRecord); if ((sortKeys != null) && (sortKeys.length() > 1)) { if (sortOrder != null) { if (sortOrder.equals("asc")) { do_query.setSortField(sortKeys, SolrQuery.ORDER.asc); } if (sortOrder.equals("desc")) { do_query.setSortField(sortKeys, SolrQuery.ORDER.desc); } } else { for (String str : sortKeys.trim().split(",")) { str = str.trim(); if (str.length() > 1) { if (str.equals("date")) { do_query.setSortField("date_date", SolrQuery.ORDER.desc); log.debug("SORTORDERDEBUG | DATE! " + str + " | "); break; } else { do_query.setSortField(str + "_str", SolrQuery.ORDER.asc); log.debug("SORTORDERDEBUG | " + str + " | "); break; } } } } } if (facet != null) { if (facet.indexOf(",") > 1) { for (String str : facet.split(",")) { if (str.indexOf("date") > 1) { do_query.addFacetField(str); } else { do_query.addFacetField(str); } // do_query.setParam("facet.method", "enum"); } // q.setFacetSort(false); } else { do_query.addFacetField(facet); } do_query.setFacet(true); do_query.setFacetMinCount(1); do_query.setFacetLimit(-1); } log.fatal(solrquery); QueryResponse rsp = null; boolean do_err = false; boolean do_sugg = false; SolrDocumentList sdl = null; String diag = ""; StringBuffer suggest = new StringBuffer(""); String content = "1"; SolrQuery spellq = do_query; try { rsp = server.query(do_query); } catch (SolrServerException e) { String header = this.SRW_HEADER.replaceAll("\\$numberOfRecords", "0"); out.write(header); diag = this.SRW_DIAG.replaceAll("\\$error", e.getMessage()); do_err = true; rsp = null; } log.fatal("query done.."); if (!(do_err)) { // XML dc response SolrDocumentList docs = rsp.getResults(); numfound = (int) docs.getNumFound(); int count = startRecord; String header = this.SRW_HEADER.replaceAll("\\$numberOfRecords", Integer.toString(numfound)); out.write(header); out.write("<srw:records>"); Iterator<SolrDocument> iter = rsp.getResults().iterator(); while (iter.hasNext()) { count += 1; if (recordSchema.equalsIgnoreCase("dc")) { SolrDocument resultDoc = iter.next(); content = (String) resultDoc.getFieldValue("id"); out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>"); out.write( "<srw:recordData xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/loc.terms/relators/OTH\" xmlns:facets=\"info:srw/extension/4/facets\" >"); StringBuffer result = new StringBuffer(""); construct_lucene_dc(result, resultDoc); out.write(result.toString()); out.write("</srw:recordData>"); out.write( "<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } if (recordSchema.equalsIgnoreCase("solr")) { SolrDocument resultDoc = iter.next(); content = (String) resultDoc.getFieldValue("id"); out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/solr</srw:recordSchema>"); out.write("<srw:recordData xmlns:expand=\"http://www.kbresearch.nl/expand\">"); StringBuffer result = new StringBuffer(""); construct_lucene_solr(result, resultDoc); out.write(result.toString()); out.write("</srw:recordData>"); out.write( "<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } if (recordSchema.equalsIgnoreCase("dcx")) { // XML dcx response out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>"); out.write( "<srw:recordData><srw_dc:dc xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/marc.relators/\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >"); SolrDocument resultDoc = iter.next(); content = (String) resultDoc.getFieldValue("id"); String dcx_data = helpers.getOAIdcx( "http://services.kb.nl/mdo/oai?verb=GetRecord&identifier=" + content, log); if (x_collection.equalsIgnoreCase("ggc-thes")) { dcx_data = helpers.getOAIdcx( "http://serviceso.kb.nl/mdo/oai?verb=GetRecord&identifier=" + content, log); } if (!(dcx_data.length() == 0)) { out.write(dcx_data); } else { // Should not do this!! out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>"); out.write( "<srw:recordData xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/loc.terms/relators/OTH\" >"); StringBuffer result = new StringBuffer(""); construct_lucene_dc(result, resultDoc); out.write(result.toString()); out.write("</srw:recordData>"); out.write( "<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } out.write("</srw_dc:dc>"); StringBuffer expand_data; boolean expand = false; if (content.startsWith("GGC-THES:AC:")) { String tmp_content = ""; tmp_content = content.replaceFirst("GGC-THES:AC:", ""); log.fatal("calling get"); expand_data = new StringBuffer( helpers.getExpand( "http://www.kbresearch.nl/general/lod_new/get/" + tmp_content + "?format=rdf", log)); log.fatal("get finini"); if (expand_data.toString().length() > 4) { out.write( "<srw_dc:expand xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >"); out.write(expand_data.toString()); expand = true; } } else { expand_data = new StringBuffer( helpers.getExpand( "http://www.kbresearch.nl/ANP.cgi?q=" + content, log)); if (expand_data.toString().length() > 0) { if (!expand) { out.write( "<srw_dc:expand xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >"); expand = true; } out.write(expand_data.toString()); } } if (expand) { out.write("</srw_dc:expand>"); } out.write("</srw:recordData>"); out.write( "<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } } } if ((do_err) || (numfound == 0)) { log.fatal("I haz suggestions"); try { spellq.setParam("spellcheck", true); spellq.setQueryType("/spell"); server = new CommonsHttpSolrServer(url); rsp = server.query(spellq); sdl = rsp.getResults(); SpellCheckResponse spell; spell = rsp.getSpellCheckResponse(); List<SpellCheckResponse.Suggestion> suggestions = spell.getSuggestions(); if (suggestions.isEmpty() == false) { suggest.append("<srw:extraResponseData>"); suggest.append("<suggestions>"); for (SpellCheckResponse.Suggestion sugg : suggestions) { suggest.append("<suggestionfor>" + sugg.getToken() + "</suggestionfor>"); for (String item : sugg.getSuggestions()) { suggest.append("<suggestion>" + item + "</suggestion>"); } suggest.append("</suggestions>"); suggest.append("</srw:extraResponseData>"); } do_sugg = true; } } catch (Exception e) { rsp = null; // log.fatal(e.toString()); } ; } ; if (!do_err) { if (facet != null) { try { fct = rsp.getFacetFields(); out.write("<srw:facets>"); for (String str : facet.split(",")) { out.write("<srw:facet>"); out.write("<srw:facetType>"); out.write(str); out.write("</srw:facetType>"); for (FacetField f : fct) { log.debug(f.getName()); // if (f.getName().equals(str+"_str") || (f.getName().equals(str+"_date")) ) { List<FacetField.Count> facetEnties = f.getValues(); for (FacetField.Count fcount : facetEnties) { out.write("<srw:facetValue>"); out.write("<srw:valueString>"); out.write(helpers.xmlEncode(fcount.getName())); out.write("</srw:valueString>"); out.write("<srw:count>"); out.write(Double.toString(fcount.getCount())); out.write("</srw:count>"); out.write("</srw:facetValue>"); // } } } out.write("</srw:facet>"); } out.write("</srw:facets>"); startRecord += 1; } catch (Exception e) { } // log.fatal(e.toString()); } } } else { out.write(diag); } out.write("</srw:records>"); // SearchRetrieve response footer String footer = this.SRW_FOOTER.replaceAll("\\$query", helpers.xmlEncode(query)); footer = footer.replaceAll("\\$startRecord", (startRecord).toString()); footer = footer.replaceAll("\\$maximumRecords", maximumRecords.toString()); footer = footer.replaceAll("\\$recordSchema", recordSchema); if (do_sugg) { out.write(suggest.toString()); } out.write(footer); } catch (MalformedURLException e) { out.write(e.getMessage()); } catch (IOException e) { out.write("TO ERR is Human"); } } } } out.close(); }
/** Integrate spelling suggestions from the various shards in a distributed environment. */ public SpellingResult mergeSuggestions( SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) { float min = 0.5f; try { min = getAccuracy(); } catch (UnsupportedOperationException uoe) { // just use .5 as a default } StringDistance sd = null; try { sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance(); } catch (UnsupportedOperationException uoe) { sd = new LevensteinDistance(); } SpellingResult result = new SpellingResult(); for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) { String original = entry.getKey(); // Only use this suggestion if all shards reported it as misspelled, // unless it was not a term original to the user's query // (WordBreakSolrSpellChecker can add new terms to the response, and we want to keep these) Integer numShards = mergeData.origVsShards.get(original); if (numShards < mergeData.totalNumberShardResponses && mergeData.isOriginalToQuery(original)) { continue; } HashSet<String> suggested = entry.getValue(); SuggestWordQueue sugQueue = new SuggestWordQueue(numSug); for (String suggestion : suggested) { SuggestWord sug = mergeData.suggestedVsWord.get(suggestion); sug.score = sd.getDistance(original, sug.string); if (sug.score < min) continue; sugQueue.insertWithOverflow(sug); if (sugQueue.size() == numSug) { // if queue full, maintain the minScore score min = sugQueue.top().score; } } // create token SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original); Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset()); // get top 'count' suggestions out of 'sugQueue.size()' candidates SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())]; // skip the first sugQueue.size() - count elements for (int k = 0; k < sugQueue.size() - count; k++) sugQueue.pop(); // now collect the top 'count' responses for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) { suggestions[k] = sugQueue.pop(); } if (extendedResults) { Integer o = mergeData.origVsFreq.get(original); if (o != null) result.addFrequency(token, o); for (SuggestWord word : suggestions) result.add(token, word.string, word.freq); } else { List<String> words = new ArrayList<>(sugQueue.size()); for (SuggestWord word : suggestions) words.add(word.string); result.add(token, words); } } return result; }