/** * Validate {@link ClusterContingencyTable} with respect to its ability to compare data * clusterings. * * @throws ParameterException on errors. */ @Test public void testCompareDatabases() { ListParameterization params = new ListParameterization(); // Input params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset); // get database Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params); db.initialize(); // verify data set size. Relation<?> rel = db.getRelation(TypeUtil.ANY); assertTrue(rel.size() == shoulds); // run all-in-one TrivialAllInOne allinone = new TrivialAllInOne(); Clustering<Model> rai = allinone.run(db); // run all-in-noise TrivialAllNoise allinnoise = new TrivialAllNoise(); Clustering<Model> ran = allinnoise.run(db); // run by-label ByLabelClustering bylabel = new ByLabelClustering(); Clustering<?> rbl = bylabel.run(db); assertEquals(1.0, computeFMeasure(rai, rai, false), Double.MIN_VALUE); assertEquals(1.0, computeFMeasure(ran, ran, false), Double.MIN_VALUE); assertEquals(1.0, computeFMeasure(rbl, rbl, false), Double.MIN_VALUE); assertEquals(0.009950248756218905, computeFMeasure(ran, rbl, true), Double.MIN_VALUE); assertEquals(0.0033277870216306157, computeFMeasure(rai, ran, true), Double.MIN_VALUE); assertEquals(0.5 /* 0.3834296724470135 */, computeFMeasure(rai, rbl, false), Double.MIN_VALUE); }
/** * Run KMeans with fixed parameters and compare the result to a golden standard. * * @throws ParameterException */ @Test public void testKMeansHybridLloydMacQueen() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); // Setup algorithm ListParameterization params = new ListParameterization(); params.addParameter(KMeans.K_ID, 5); params.addParameter(KMeans.SEED_ID, 2); AbstractKMeans<DoubleVector, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansHybridLloydMacQueen.class, params); testParameterizationOk(params); // run KMeans on database Clustering<?> result = kmeans.run(db); testFMeasure(db, result, 0.998005); testClusterSizes(result, new int[] {199, 200, 200, 200, 201}); }
@Test public void testLOF() { Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345); // Parameterization ListParameterization params = new ListParameterization(); params.addParameter(LOF.Parameterizer.K_ID, 10); // setup Algorithm LOF<DoubleVector> lof = ClassGenericsUtil.parameterizeOrAbort(LOF.class, params); testParameterizationOk(params); // run LOF on database OutlierResult result = lof.run(db); testSingleScore(result, 1293, 1.1945314199156365); testAUC(db, "Noise", result, 0.8921680672268908); }
@Test public void testKNNWeightOutlier() { Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); // Parameterization ListParameterization params = new ListParameterization(); params.addParameter(KNNWeightOutlier.Parameterizer.K_ID, 4); // setup Algorithm KNNWeightOutlier<DoubleVector> knnWeightOutlier = ClassGenericsUtil.parameterizeOrAbort(KNNWeightOutlier.class, params); testParameterizationOk(params); // run KNNWeightOutlier on database OutlierResult result = knnWeightOutlier.run(db); testSingleScore(result, 945, 2.384117261027324); testAUC(db, "Noise", result, 0.9912777777777778); }
@Test public void testDBOutlierDetection() { Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025); // Parameterization ListParameterization params = new ListParameterization(); params.addParameter(DBOutlierDetection.Parameterizer.D_ID, 0.175); params.addParameter(DBOutlierDetection.Parameterizer.P_ID, 0.98); // setup Algorithm DBOutlierDetection<DoubleVector> dbOutlierDetection = ClassGenericsUtil.parameterizeOrAbort(DBOutlierDetection.class, params); testParameterizationOk(params); // run DBOutlierDetection on database OutlierResult result = dbOutlierDetection.run(db); testSingleScore(result, 1025, 0.0); testAUC(db, "Noise", result, 0.97487179); }
@Override protected void makeOptions(Parameterization config) { super.makeOptions(config); format = ClassGenericsUtil.parameterizeOrAbort(CSVReaderFormat.class, config); }