Esempio n. 1
0
 /**
  * The ActionListener implementation
  *
  * @param event the event.
  */
 public void actionPerformed(ActionEvent event) {
   String searchText = textField.getText().trim();
   if (searchText.equals("") && !saveAs.isSelected() && (fileLength > 10000000)) {
     textPane.setText("Blank search text is not allowed for large IdTables.");
   } else {
     File outputFile = null;
     if (saveAs.isSelected()) {
       outputFile = chooser.getSelectedFile();
       if (outputFile != null) {
         String name = outputFile.getName();
         int k = name.lastIndexOf(".");
         if (k != -1) name = name.substring(0, k);
         name += ".txt";
         File parent = outputFile.getAbsoluteFile().getParentFile();
         outputFile = new File(parent, name);
         chooser.setSelectedFile(outputFile);
       }
       if (chooser.showSaveDialog(this) != JFileChooser.APPROVE_OPTION) System.exit(0);
       outputFile = chooser.getSelectedFile();
     }
     textPane.setText("");
     Searcher searcher = new Searcher(searchText, event.getSource().equals(searchPHI), outputFile);
     searcher.start();
   }
 }
Esempio n. 2
0
  // QHandler handles incoming queries.
  public void run() {
    // hmmm.. this seems like potential bug.  I want to check that query is not in table.  But even
    // if query table contains key,
    // that does not necessarily mean it is in table, b/c two queries can have SAME HASHCODE VALUE.
    // I need to have some other means.
    // Will talk to Rusty @ this on Monday.

    if (!qt.containsKey(query)) // check that query is not already in table
    {
      Searcher.inform(query); // Give information to the Search Monitor panel
      NetworkManager.writeButOne(query.getIP(), query); /*Query is forwarded to all connected nodes
								     except one from which query came. */
      qt.put((Packet) query, query); // add query to table, indexed by its unique MessageID
      searchResult =
          SharedDirectory.search(query.getSearchString()); // check shared directory for query match
      numHits = searchResult.getSize();

      if (numHits != 0) // package a query hit to send out if there is at least one query match
      {
        queryID = query.getMessageID();
        port = Mine.getPort();
        myIP = Mine.getIPAddress();
        speed = Mine.getSpeed();
        serventID = Mine.getServentIdentifier();
        queryHit = new QueryHit(numHits, port, myIP, speed, searchResult, serventID, queryID);
        NetworkManager.writeToOne(
            query.getIP(), queryHit); // send qHit back to node that sent original query
      }
    }
  }
  private Element getDescription(String task) {

    if (task != null) {

      if (task.equals("define")) return definer.getDescription();

      if (task.equals("compare")) return comparer.getDescription();

      if (task.equals("search")) return searcher.getDescription();

      if (task.equals("wikify")) return wikifier.getDescription();
    }

    Element description = doc.createElement("Description");

    description.appendChild(
        createElement(
            "Details",
            "<p>This servlet provides a range of services for mining information from Wikipedia. Further details depend on what you want to do.</p>"
                + "<p>You can <a href=\""
                + context.getInitParameter("service_name")
                + "?task=search&help\">search for pages</a>, <a href=\""
                + context.getInitParameter("service_name")
                + "?task=compare&help\">measure how terms or articles related to each other</a>, <a href=\""
                + context.getInitParameter("service_name")
                + "?task=define&help\">obtain short definitions from articles</a>, and <a href=\""
                + context.getInitParameter("service_name")
                + "?task=wikify&help\">detect topics in web pages</a>.</p>"));

    Element paramTask =
        createElement(
            "Parameter",
            "Specifies what you want to do: can be <em>search</em>, <em>compare</em>, <em>define</em>, or <em>wikify</em>");
    paramTask.setAttribute("name", "task");
    description.appendChild(paramTask);

    Element paramId = doc.createElement("Parameter");
    paramId.setAttribute("name", "help");
    paramId.appendChild(doc.createTextNode("Specifies that you want help about the service."));
    description.appendChild(paramId);

    return description;
  }
  protected void search(boolean persistent, boolean async) {
    try {
      synchronized (this) {
        if (current_search == null) {

          current_search = new Searcher(persistent, async);

        } else {

          if (!current_search.wakeup()) {

            current_search = new Searcher(persistent, async);
          }
        }
      }
    } catch (Throwable e) {

      Debug.out(e);
    }
  }
  public void doGet(HttpServletRequest request, HttpServletResponse response)
      throws IOException, ServletException {

    try {

      response.setHeader("Cache-Control", "no-cache");
      response.setCharacterEncoding("UTF-8");

      String task = request.getParameter("task");

      Element data = null;

      // process help request
      if (request.getParameter("help") != null) data = getDescription(task);

      // redirect to home page if there is no task
      if (data == null && task == null) {
        response.setContentType("text/html");
        response
            .getWriter()
            .append(
                "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"><html><head><meta http-equiv=\"REFRESH\" content=\"0;url="
                    + context.getInitParameter("server_path")
                    + "></head><body></body></html>");
        return;
      }

      // process definition request
      if (data == null && task.equals("define")) {
        int id = resolveIntegerArg(request.getParameter("id"), -1);
        int length = resolveIntegerArg(request.getParameter("length"), definer.getDefaultLength());
        int format = resolveIntegerArg(request.getParameter("format"), definer.getDefaultFormat());
        int maxImageWidth =
            resolveIntegerArg(
                request.getParameter("maxImageWidth"), definer.getDefaultMaxImageWidth());
        int maxImageHeight =
            resolveIntegerArg(
                request.getParameter("maxImageHeight"), definer.getDefaultMaxImageHeight());
        int linkDestination =
            resolveIntegerArg(
                request.getParameter("linkDestination"), definer.getDefaultLinkDestination());
        boolean getImages = resolveBooleanArg(request.getParameter("getImages"), false);

        data =
            definer.getDefinition(
                id, length, format, linkDestination, getImages, maxImageWidth, maxImageHeight);
      }

      // all of the remaining tasks require data to be cached, so lets make sure that is finished
      // before continuing.
      if (!cachingThread.isOk()) throw new ServletException("Could not cache wikipedia data");

      double progress = cachingThread.getProgress();
      if (data == null && (progress < 1 || task.equals("progress"))) {
        // still caching up data, not ready to return a response yet.

        data = doc.createElement("loading");
        data.setAttribute("progress", df.format(progress));
        task = "loading";
      }

      // process search request
      if (data == null && task.equals("search")) {
        String term = request.getParameter("term");
        String id = request.getParameter("id");
        int linkLimit =
            resolveIntegerArg(request.getParameter("linkLimit"), searcher.getDefaultMaxLinkCount());
        int senseLimit =
            resolveIntegerArg(
                request.getParameter("senseLimit"), searcher.getDefaultMaxSenseCount());

        if (id == null) data = searcher.doSearch(term, linkLimit, senseLimit);
        else data = searcher.doSearch(Integer.parseInt(id), linkLimit);
      }

      // process compare request
      if (data == null && task.equals("compare")) {
        String term1 = request.getParameter("term1");
        String term2 = request.getParameter("term2");
        int linkLimit =
            resolveIntegerArg(request.getParameter("linkLimit"), comparer.getDefaultMaxLinkCount());
        boolean details =
            resolveBooleanArg(request.getParameter("details"), comparer.getDefaultShowDetails());

        data = comparer.getRelatedness(term1, term2, details, linkLimit);
      }

      // process wikify request
      if (data == null && task.equals("wikify")) {

        if (this.wikifier == null)
          throw new ServletException(
              "Wikifier is not available. You must configure the servlet so that it has access to link detection and disambiguation models.");

        String source = request.getParameter("source");
        int sourceMode =
            resolveIntegerArg(request.getParameter("sourceMode"), Wikifier.SOURCE_AUTODETECT);
        String linkColor = request.getParameter("linkColor");
        String baseColor = request.getParameter("baseColor");
        double minProb =
            resolveDoubleArg(
                request.getParameter("minProbability"), wikifier.getDefaultMinProbability());
        int repeatMode =
            resolveIntegerArg(request.getParameter("repeatMode"), wikifier.getDefaultRepeatMode());
        boolean showTooltips =
            resolveBooleanArg(
                request.getParameter("showTooltips"), wikifier.getDefaultShowTooltips());
        String bannedTopics = request.getParameter("bannedTopics");

        boolean wrapInXml = resolveBooleanArg(request.getParameter("wrapInXml"), true);

        if (wrapInXml) {
          data =
              wikifier.wikifyAndWrapInXML(
                  source,
                  sourceMode,
                  minProb,
                  repeatMode,
                  bannedTopics,
                  baseColor,
                  linkColor,
                  showTooltips);
        } else {
          response.setContentType("text/html");
          response
              .getWriter()
              .append(
                  wikifier.wikify(
                      source,
                      sourceMode,
                      minProb,
                      repeatMode,
                      bannedTopics,
                      baseColor,
                      linkColor,
                      showTooltips));
          return;
        }
      }

      if (data == null) throw new Exception("Unknown Task");

      // wrap data
      Element wrapper = doc.createElement("WikipediaMinerResponse");
      wrapper.setAttribute("server_path", context.getInitParameter("server_path"));
      wrapper.setAttribute("service_name", context.getInitParameter("service_name"));
      wrapper.appendChild(data);

      data = wrapper;

      // Transform or serialize xml data as appropriate

      Transformer tf = null;

      if (request.getParameter("xml") == null) {
        // we need to transform the data into html
        tf = transformersByName.get(task);

        if (request.getParameter("help") != null) tf = transformersByName.get("help");
      }

      if (tf == null) {
        // we need to serialize the data as xml
        tf = transformersByName.get("serializer");
        response.setContentType("application/xml");
      } else {
        // output will be transformed to html
        response.setContentType("text/html");
        response
            .getWriter()
            .append(
                "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n");
      }

      tf.transform(new DOMSource(data), new StreamResult(response.getWriter()));

    } catch (Exception error) {
      response.reset();
      response.setContentType("application/xml");
      response.setHeader("Cache-Control", "no-cache");
      response.setCharacterEncoding("UTF8");

      Element xmlError = doc.createElement("Error");
      if (error.getMessage() != null) xmlError.setAttribute("message", error.getMessage());

      Element xmlStackTrace = doc.createElement("StackTrace");
      xmlError.appendChild(xmlStackTrace);

      for (StackTraceElement ste : error.getStackTrace()) {

        Element xmlSte = doc.createElement("StackTraceElement");
        xmlSte.setAttribute("message", ste.toString());
        xmlStackTrace.appendChild(xmlSte);
      }
      try {
        transformersByName
            .get("serializer")
            .transform(new DOMSource(xmlError), new StreamResult(response.getWriter()));
      } catch (Exception e) {
        // TODO: something for when an error is thrown processing an error????

      }
      ;
    }
  }
  // private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException {
  private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException {
    int readerMax = reader.maxDoc();
    DocScore[] newToOld = new DocScore[readerMax];

    // use site, an indexed, un-tokenized field to get boost
    // byte[] boosts = reader.norms("site"); TODO MC
    /* TODO MC */
    Document docMeta;
    Pattern includes = Pattern.compile("\\|");
    String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, "");
    String includeExtensions[] = includes.split(value);
    Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>();
    for (int i = 0; i < includeExtensions.length; i++) {
      validExtensions.put(includeExtensions[i], true);
      System.out.println("extension boosted " + includeExtensions[i]);
    }
    /* TODO MC */

    for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) {
      float score;
      if (reader.isDeleted(oldDoc)) {
        // score = 0.0f;
        score = -1f; // TODO MC
      } else {
        // score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC
        /* TODO MC */
        docMeta = searcher.doc(oldDoc);
        if (validExtensions.get(docMeta.get("subType"))
            == null) { // searched extensions will have higher scores
          score = -0.5f;
        } else {
          score = Integer.parseInt(docMeta.get("inlinks"));
          /*
          if (score==0) {
          	score=0.001f; // TODO MC - to not erase
          }
          */
        }
        /* TODO MC */
        // System.out.println("Score for old document "+oldDoc+" is "+score+" and type
        // "+docMeta.get("subType")); // TODO MC debug remove
      }
      DocScore docScore = new DocScore();
      docScore.doc = oldDoc;
      docScore.score = score;
      newToOld[oldDoc] = docScore;
    }

    System.out.println("Sorting " + newToOld.length + " documents.");
    Arrays.sort(newToOld);
    // HeapSorter.sort(newToOld); // TODO MC - due to the lack of space

    /* TODO MC
    int[] oldToNew = new int[readerMax];
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
      DocScore docScore = newToOld[newDoc];
      //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC
      oldToNew[docScore.oldDoc] = newDoc; // TODO MC
    }
    */

    /* TODO MC *
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
    	DocScore docScore = newToOld[newDoc];
    	System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove
    }
    * TODO MC */

    // return oldToNew; TODO MC
    return newToOld; // TODO MC
  }