/** * Cluster data retrieved from a search engine or some other source registered in the DCS as a * document source. */ public void clusterFromSearchEngine() throws IOException { final Map<String, String> attributes = new LinkedHashMap<String, String>(); /* * For this request, we will pass some additional attributes to the default * algorithm and ask to skip the fetched documents in the output (retrieve * clusters only). */ System.out.println("## Clustering search results from a search engine"); // We use etools meta search engine input component. attributes.put("dcs.source", "etools"); attributes.put("query", "test"); attributes.put("results", "20"); attributes.put("dcs.algorithm", "lingo"); attributes.put("dcs.clusters.only", "true"); // Some customized algorithm parameters. attributes.put("LingoClusteringAlgorithm.desiredClusterCountBase", "10"); attributes.put("LingoClusteringAlgorithm.factorizationQuality", "LOW"); attributes.put( "LingoClusteringAlgorithm.factorizationFactory", "org.carrot2.matrix.factorization.PartialSingularValueDecompositionFactory"); displayResults(httpPoster.post(dcsURI, attributes)); }
/** Cluster data from an external XML stream feed (providing an URL to that feed). */ public void clusterFromRemoteXML() throws IOException { final Map<String, String> attributes = new LinkedHashMap<String, String>(); System.out.println("## Clustering documents from a remote XML feed"); attributes.put("dcs.source", "xml"); attributes.put("dcs.algorithm", "stc"); attributes.put("XmlDocumentSource.xml", XML_FEED); displayResults(httpPoster.post(dcsURI, attributes)); }
/** Cluster data from an XML file (local). */ public void clusterFromFile() throws IOException { final Map<String, String> attributes = new LinkedHashMap<String, String>(); System.out.println("## Clustering documents from a local file"); /* * Note the optional query attribute, we can provide it to avoid creation of * trivial clusters. */ attributes.put( "dcs.c2stream", new String(StreamUtils.readFullyAndClose(new FileInputStream(XML_FILE_PATH)), "UTF-8")); attributes.put("query", "data mining"); displayResults(httpPoster.post(dcsURI, attributes)); }