@Override public void observe(Model<VectorWritable> x) { AbstractCluster cl = (AbstractCluster) x; setS0(getS0() + cl.getS0()); setS1(getS1().plus(cl.getS1())); setS2(getS2().plus(cl.getS2())); }
@Test public void testGaussianClusterClassification() { ClusterClassifier classifier = newGaussianClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.212, 0.576, 0.212]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.952, 0.047, 0.000]", AbstractCluster.formatVector(pdf, null)); }
@Test public void testSoftClusterClassification() { ClusterClassifier classifier = newSoftClusterClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.000, 1.000, 0.000]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.735, 0.184, 0.082]", AbstractCluster.formatVector(pdf, null)); }
@Test public void testClusterClassification() { ClusterClassifier classifier = newClusterClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.107, 0.787, 0.107]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.867, 0.117, 0.016]", AbstractCluster.formatVector(pdf, null)); }
@Test public void testCanopyClassification() { List<Cluster> models = Lists.newArrayList(); DistanceMeasure measure = new ManhattanDistanceMeasure(); models.add(new Canopy(new DenseVector(2).assign(1), 0, measure)); models.add(new Canopy(new DenseVector(2), 1, measure)); models.add(new Canopy(new DenseVector(2).assign(-1), 2, measure)); ClusterClassifier classifier = new ClusterClassifier(models); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.107, 0.787, 0.107]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.867, 0.117, 0.016]", AbstractCluster.formatVector(pdf, null)); }
public void printClusters(String[] dictionary) throws IOException, InstantiationException, IllegalAccessException { Configuration conf = new Configuration(); if (this.termDictionary != null) { if ("text".equals(dictionaryFormat)) { dictionary = VectorHelper.loadTermDictionary(new File(this.termDictionary)); } else if ("sequencefile".equals(dictionaryFormat)) { FileSystem fs = FileSystem.get(new Path(this.termDictionary).toUri(), conf); dictionary = VectorHelper.loadTermDictionary(conf, fs, this.termDictionary); } else { throw new IllegalArgumentException("Invalid dictionary format"); } } Writer writer = this.outputFile == null ? new OutputStreamWriter(System.out) : new FileWriter(this.outputFile); try { FileSystem fs = seqFileDir.getFileSystem(conf); for (FileStatus seqFile : fs.globStatus(new Path(seqFileDir, "part-*"))) { Path path = seqFile.getPath(); // System.out.println("Input Path: " + path); doesn't this interfere with output? SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); try { Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance(); while (reader.next(key, value)) { Cluster cluster = (Cluster) value; String fmtStr = useJSON ? cluster.asJsonString() : cluster.asFormatString(dictionary); if (subString > 0 && fmtStr.length() > subString) { writer.write(':'); writer.write(fmtStr, 0, Math.min(subString, fmtStr.length())); } else { writer.write(fmtStr); } writer.write('\n'); if (dictionary != null) { String topTerms = getTopFeatures(cluster.getCenter(), dictionary, numTopFeatures); writer.write("\tTop Terms: "); writer.write(topTerms); writer.write('\n'); } List<WeightedVectorWritable> points = clusterIdToPoints.get(cluster.getId()); if (points != null) { writer.write("\tWeight: Point:\n\t"); for (Iterator<WeightedVectorWritable> iterator = points.iterator(); iterator.hasNext(); ) { WeightedVectorWritable point = iterator.next(); writer.write(String.valueOf(point.getWeight())); writer.write(": "); writer.write(AbstractCluster.formatVector(point.getVector(), dictionary)); if (iterator.hasNext()) { writer.write("\n\t"); } } writer.write('\n'); } } } finally { reader.close(); } } } finally { writer.close(); } }