@Test public void testSeqFileClusterIteratorKMeans() throws IOException { Path pointsPath = getTestTempDirPath("points"); Path priorPath = getTestTempDirPath("prior"); Path outPath = getTestTempDirPath("output"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); List<VectorWritable> points = TestKmeansClustering.getPointsWritable(TestKmeansClustering.REFERENCE); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf); Path path = new Path(priorPath, "priorClassifier"); ClusterClassifier prior = newClusterClassifier(); writeClassifier(prior, conf, path, fs); assertEquals(3, prior.getModels().size()); System.out.println("Prior"); for (Cluster cluster : prior.getModels()) { System.out.println(cluster.asFormatString(null)); } ClusteringPolicy policy = new KMeansClusteringPolicy(); ClusterIterator iterator = new ClusterIterator(policy); iterator.iterate(pointsPath, path, outPath, 5); for (int i = 1; i <= 5; i++) { System.out.println("Classifier-" + i); ClusterClassifier posterior = readClassifier(conf, new Path(outPath, "classifier-" + i), fs); assertEquals(3, posterior.getModels().size()); for (Cluster cluster : posterior.getModels()) { System.out.println(cluster.asFormatString(null)); } } }
@Test public void testClusterIteratorDirichlet() { List<Vector> data = TestKmeansClustering.getPoints(TestKmeansClustering.REFERENCE); ClusteringPolicy policy = new DirichletClusteringPolicy(3, 1); ClusterClassifier prior = newClusterClassifier(); ClusterIterator iterator = new ClusterIterator(policy); ClusterClassifier posterior = iterator.iterate(data, prior, 5); assertEquals(3, posterior.getModels().size()); for (Cluster cluster : posterior.getModels()) { System.out.println(cluster.asFormatString(null)); } }