Example #1
0
  /**
   * Cluster data retrieved from a search engine or some other source registered in the DCS as a
   * document source.
   */
  public void clusterFromSearchEngine() throws IOException {
    final Map<String, String> attributes = new LinkedHashMap<String, String>();

    /*
     * For this request, we will pass some additional attributes to the default
     * algorithm and ask to skip the fetched documents in the output (retrieve
     * clusters only).
     */

    System.out.println("## Clustering search results from a search engine");

    // We use etools meta search engine input component.
    attributes.put("dcs.source", "etools");
    attributes.put("query", "test");
    attributes.put("results", "20");
    attributes.put("dcs.algorithm", "lingo");
    attributes.put("dcs.clusters.only", "true");

    // Some customized algorithm parameters.
    attributes.put("LingoClusteringAlgorithm.desiredClusterCountBase", "10");
    attributes.put("LingoClusteringAlgorithm.factorizationQuality", "LOW");
    attributes.put(
        "LingoClusteringAlgorithm.factorizationFactory",
        "org.carrot2.matrix.factorization.PartialSingularValueDecompositionFactory");

    displayResults(httpPoster.post(dcsURI, attributes));
  }
Example #2
0
  /** Cluster data from an external XML stream feed (providing an URL to that feed). */
  public void clusterFromRemoteXML() throws IOException {
    final Map<String, String> attributes = new LinkedHashMap<String, String>();

    System.out.println("## Clustering documents from a remote XML feed");
    attributes.put("dcs.source", "xml");
    attributes.put("dcs.algorithm", "stc");
    attributes.put("XmlDocumentSource.xml", XML_FEED);

    displayResults(httpPoster.post(dcsURI, attributes));
  }
Example #3
0
  /** Cluster data from an XML file (local). */
  public void clusterFromFile() throws IOException {
    final Map<String, String> attributes = new LinkedHashMap<String, String>();

    System.out.println("## Clustering documents from a local file");

    /*
     * Note the optional query attribute, we can provide it to avoid creation of
     * trivial clusters.
     */

    attributes.put(
        "dcs.c2stream",
        new String(StreamUtils.readFullyAndClose(new FileInputStream(XML_FILE_PATH)), "UTF-8"));
    attributes.put("query", "data mining");

    displayResults(httpPoster.post(dcsURI, attributes));
  }