/** * The ActionListener implementation * * @param event the event. */ public void actionPerformed(ActionEvent event) { String searchText = textField.getText().trim(); if (searchText.equals("") && !saveAs.isSelected() && (fileLength > 10000000)) { textPane.setText("Blank search text is not allowed for large IdTables."); } else { File outputFile = null; if (saveAs.isSelected()) { outputFile = chooser.getSelectedFile(); if (outputFile != null) { String name = outputFile.getName(); int k = name.lastIndexOf("."); if (k != -1) name = name.substring(0, k); name += ".txt"; File parent = outputFile.getAbsoluteFile().getParentFile(); outputFile = new File(parent, name); chooser.setSelectedFile(outputFile); } if (chooser.showSaveDialog(this) != JFileChooser.APPROVE_OPTION) System.exit(0); outputFile = chooser.getSelectedFile(); } textPane.setText(""); Searcher searcher = new Searcher(searchText, event.getSource().equals(searchPHI), outputFile); searcher.start(); } }
// QHandler handles incoming queries. public void run() { // hmmm.. this seems like potential bug. I want to check that query is not in table. But even // if query table contains key, // that does not necessarily mean it is in table, b/c two queries can have SAME HASHCODE VALUE. // I need to have some other means. // Will talk to Rusty @ this on Monday. if (!qt.containsKey(query)) // check that query is not already in table { Searcher.inform(query); // Give information to the Search Monitor panel NetworkManager.writeButOne(query.getIP(), query); /*Query is forwarded to all connected nodes except one from which query came. */ qt.put((Packet) query, query); // add query to table, indexed by its unique MessageID searchResult = SharedDirectory.search(query.getSearchString()); // check shared directory for query match numHits = searchResult.getSize(); if (numHits != 0) // package a query hit to send out if there is at least one query match { queryID = query.getMessageID(); port = Mine.getPort(); myIP = Mine.getIPAddress(); speed = Mine.getSpeed(); serventID = Mine.getServentIdentifier(); queryHit = new QueryHit(numHits, port, myIP, speed, searchResult, serventID, queryID); NetworkManager.writeToOne( query.getIP(), queryHit); // send qHit back to node that sent original query } } }
private Element getDescription(String task) { if (task != null) { if (task.equals("define")) return definer.getDescription(); if (task.equals("compare")) return comparer.getDescription(); if (task.equals("search")) return searcher.getDescription(); if (task.equals("wikify")) return wikifier.getDescription(); } Element description = doc.createElement("Description"); description.appendChild( createElement( "Details", "<p>This servlet provides a range of services for mining information from Wikipedia. Further details depend on what you want to do.</p>" + "<p>You can <a href=\"" + context.getInitParameter("service_name") + "?task=search&help\">search for pages</a>, <a href=\"" + context.getInitParameter("service_name") + "?task=compare&help\">measure how terms or articles related to each other</a>, <a href=\"" + context.getInitParameter("service_name") + "?task=define&help\">obtain short definitions from articles</a>, and <a href=\"" + context.getInitParameter("service_name") + "?task=wikify&help\">detect topics in web pages</a>.</p>")); Element paramTask = createElement( "Parameter", "Specifies what you want to do: can be <em>search</em>, <em>compare</em>, <em>define</em>, or <em>wikify</em>"); paramTask.setAttribute("name", "task"); description.appendChild(paramTask); Element paramId = doc.createElement("Parameter"); paramId.setAttribute("name", "help"); paramId.appendChild(doc.createTextNode("Specifies that you want help about the service.")); description.appendChild(paramId); return description; }
protected void search(boolean persistent, boolean async) { try { synchronized (this) { if (current_search == null) { current_search = new Searcher(persistent, async); } else { if (!current_search.wakeup()) { current_search = new Searcher(persistent, async); } } } } catch (Throwable e) { Debug.out(e); } }
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { try { response.setHeader("Cache-Control", "no-cache"); response.setCharacterEncoding("UTF-8"); String task = request.getParameter("task"); Element data = null; // process help request if (request.getParameter("help") != null) data = getDescription(task); // redirect to home page if there is no task if (data == null && task == null) { response.setContentType("text/html"); response .getWriter() .append( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"><html><head><meta http-equiv=\"REFRESH\" content=\"0;url=" + context.getInitParameter("server_path") + "></head><body></body></html>"); return; } // process definition request if (data == null && task.equals("define")) { int id = resolveIntegerArg(request.getParameter("id"), -1); int length = resolveIntegerArg(request.getParameter("length"), definer.getDefaultLength()); int format = resolveIntegerArg(request.getParameter("format"), definer.getDefaultFormat()); int maxImageWidth = resolveIntegerArg( request.getParameter("maxImageWidth"), definer.getDefaultMaxImageWidth()); int maxImageHeight = resolveIntegerArg( request.getParameter("maxImageHeight"), definer.getDefaultMaxImageHeight()); int linkDestination = resolveIntegerArg( request.getParameter("linkDestination"), definer.getDefaultLinkDestination()); boolean getImages = resolveBooleanArg(request.getParameter("getImages"), false); data = definer.getDefinition( id, length, format, linkDestination, getImages, maxImageWidth, maxImageHeight); } // all of the remaining tasks require data to be cached, so lets make sure that is finished // before continuing. if (!cachingThread.isOk()) throw new ServletException("Could not cache wikipedia data"); double progress = cachingThread.getProgress(); if (data == null && (progress < 1 || task.equals("progress"))) { // still caching up data, not ready to return a response yet. data = doc.createElement("loading"); data.setAttribute("progress", df.format(progress)); task = "loading"; } // process search request if (data == null && task.equals("search")) { String term = request.getParameter("term"); String id = request.getParameter("id"); int linkLimit = resolveIntegerArg(request.getParameter("linkLimit"), searcher.getDefaultMaxLinkCount()); int senseLimit = resolveIntegerArg( request.getParameter("senseLimit"), searcher.getDefaultMaxSenseCount()); if (id == null) data = searcher.doSearch(term, linkLimit, senseLimit); else data = searcher.doSearch(Integer.parseInt(id), linkLimit); } // process compare request if (data == null && task.equals("compare")) { String term1 = request.getParameter("term1"); String term2 = request.getParameter("term2"); int linkLimit = resolveIntegerArg(request.getParameter("linkLimit"), comparer.getDefaultMaxLinkCount()); boolean details = resolveBooleanArg(request.getParameter("details"), comparer.getDefaultShowDetails()); data = comparer.getRelatedness(term1, term2, details, linkLimit); } // process wikify request if (data == null && task.equals("wikify")) { if (this.wikifier == null) throw new ServletException( "Wikifier is not available. You must configure the servlet so that it has access to link detection and disambiguation models."); String source = request.getParameter("source"); int sourceMode = resolveIntegerArg(request.getParameter("sourceMode"), Wikifier.SOURCE_AUTODETECT); String linkColor = request.getParameter("linkColor"); String baseColor = request.getParameter("baseColor"); double minProb = resolveDoubleArg( request.getParameter("minProbability"), wikifier.getDefaultMinProbability()); int repeatMode = resolveIntegerArg(request.getParameter("repeatMode"), wikifier.getDefaultRepeatMode()); boolean showTooltips = resolveBooleanArg( request.getParameter("showTooltips"), wikifier.getDefaultShowTooltips()); String bannedTopics = request.getParameter("bannedTopics"); boolean wrapInXml = resolveBooleanArg(request.getParameter("wrapInXml"), true); if (wrapInXml) { data = wikifier.wikifyAndWrapInXML( source, sourceMode, minProb, repeatMode, bannedTopics, baseColor, linkColor, showTooltips); } else { response.setContentType("text/html"); response .getWriter() .append( wikifier.wikify( source, sourceMode, minProb, repeatMode, bannedTopics, baseColor, linkColor, showTooltips)); return; } } if (data == null) throw new Exception("Unknown Task"); // wrap data Element wrapper = doc.createElement("WikipediaMinerResponse"); wrapper.setAttribute("server_path", context.getInitParameter("server_path")); wrapper.setAttribute("service_name", context.getInitParameter("service_name")); wrapper.appendChild(data); data = wrapper; // Transform or serialize xml data as appropriate Transformer tf = null; if (request.getParameter("xml") == null) { // we need to transform the data into html tf = transformersByName.get(task); if (request.getParameter("help") != null) tf = transformersByName.get("help"); } if (tf == null) { // we need to serialize the data as xml tf = transformersByName.get("serializer"); response.setContentType("application/xml"); } else { // output will be transformed to html response.setContentType("text/html"); response .getWriter() .append( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n"); } tf.transform(new DOMSource(data), new StreamResult(response.getWriter())); } catch (Exception error) { response.reset(); response.setContentType("application/xml"); response.setHeader("Cache-Control", "no-cache"); response.setCharacterEncoding("UTF8"); Element xmlError = doc.createElement("Error"); if (error.getMessage() != null) xmlError.setAttribute("message", error.getMessage()); Element xmlStackTrace = doc.createElement("StackTrace"); xmlError.appendChild(xmlStackTrace); for (StackTraceElement ste : error.getStackTrace()) { Element xmlSte = doc.createElement("StackTraceElement"); xmlSte.setAttribute("message", ste.toString()); xmlStackTrace.appendChild(xmlSte); } try { transformersByName .get("serializer") .transform(new DOMSource(xmlError), new StreamResult(response.getWriter())); } catch (Exception e) { // TODO: something for when an error is thrown processing an error???? } ; } }
// private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException { private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException { int readerMax = reader.maxDoc(); DocScore[] newToOld = new DocScore[readerMax]; // use site, an indexed, un-tokenized field to get boost // byte[] boosts = reader.norms("site"); TODO MC /* TODO MC */ Document docMeta; Pattern includes = Pattern.compile("\\|"); String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, ""); String includeExtensions[] = includes.split(value); Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>(); for (int i = 0; i < includeExtensions.length; i++) { validExtensions.put(includeExtensions[i], true); System.out.println("extension boosted " + includeExtensions[i]); } /* TODO MC */ for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) { float score; if (reader.isDeleted(oldDoc)) { // score = 0.0f; score = -1f; // TODO MC } else { // score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC /* TODO MC */ docMeta = searcher.doc(oldDoc); if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores score = -0.5f; } else { score = Integer.parseInt(docMeta.get("inlinks")); /* if (score==0) { score=0.001f; // TODO MC - to not erase } */ } /* TODO MC */ // System.out.println("Score for old document "+oldDoc+" is "+score+" and type // "+docMeta.get("subType")); // TODO MC debug remove } DocScore docScore = new DocScore(); docScore.doc = oldDoc; docScore.score = score; newToOld[oldDoc] = docScore; } System.out.println("Sorting " + newToOld.length + " documents."); Arrays.sort(newToOld); // HeapSorter.sort(newToOld); // TODO MC - due to the lack of space /* TODO MC int[] oldToNew = new int[readerMax]; for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC oldToNew[docScore.oldDoc] = newDoc; // TODO MC } */ /* TODO MC * for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove } * TODO MC */ // return oldToNew; TODO MC return newToOld; // TODO MC }