/**
   * Validate {@link ClusterContingencyTable} with respect to its ability to compare data
   * clusterings.
   *
   * @throws ParameterException on errors.
   */
  @Test
  public void testCompareDatabases() {
    ListParameterization params = new ListParameterization();
    // Input
    params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);

    // get database
    Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params);
    db.initialize();

    // verify data set size.
    Relation<?> rel = db.getRelation(TypeUtil.ANY);
    assertTrue(rel.size() == shoulds);

    // run all-in-one
    TrivialAllInOne allinone = new TrivialAllInOne();
    Clustering<Model> rai = allinone.run(db);

    // run all-in-noise
    TrivialAllNoise allinnoise = new TrivialAllNoise();
    Clustering<Model> ran = allinnoise.run(db);

    // run by-label
    ByLabelClustering bylabel = new ByLabelClustering();
    Clustering<?> rbl = bylabel.run(db);

    assertEquals(1.0, computeFMeasure(rai, rai, false), Double.MIN_VALUE);
    assertEquals(1.0, computeFMeasure(ran, ran, false), Double.MIN_VALUE);
    assertEquals(1.0, computeFMeasure(rbl, rbl, false), Double.MIN_VALUE);

    assertEquals(0.009950248756218905, computeFMeasure(ran, rbl, true), Double.MIN_VALUE);
    assertEquals(0.0033277870216306157, computeFMeasure(rai, ran, true), Double.MIN_VALUE);

    assertEquals(0.5 /* 0.3834296724470135 */, computeFMeasure(rai, rbl, false), Double.MIN_VALUE);
  }
  /**
   * Run KMeans with fixed parameters and compare the result to a golden standard.
   *
   * @throws ParameterException
   */
  @Test
  public void testKMeansHybridLloydMacQueen() {
    Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000);

    // Setup algorithm
    ListParameterization params = new ListParameterization();
    params.addParameter(KMeans.K_ID, 5);
    params.addParameter(KMeans.SEED_ID, 2);
    AbstractKMeans<DoubleVector, ?> kmeans =
        ClassGenericsUtil.parameterizeOrAbort(KMeansHybridLloydMacQueen.class, params);
    testParameterizationOk(params);

    // run KMeans on database
    Clustering<?> result = kmeans.run(db);
    testFMeasure(db, result, 0.998005);
    testClusterSizes(result, new int[] {199, 200, 200, 200, 201});
  }
Exemple #3
0
  @Test
  public void testLOF() {
    Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345);

    // Parameterization
    ListParameterization params = new ListParameterization();
    params.addParameter(LOF.Parameterizer.K_ID, 10);

    // setup Algorithm
    LOF<DoubleVector> lof = ClassGenericsUtil.parameterizeOrAbort(LOF.class, params);
    testParameterizationOk(params);

    // run LOF on database
    OutlierResult result = lof.run(db);

    testSingleScore(result, 1293, 1.1945314199156365);
    testAUC(db, "Noise", result, 0.8921680672268908);
  }
Exemple #4
0
  @Test
  public void testKNNWeightOutlier() {
    Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960);

    // Parameterization
    ListParameterization params = new ListParameterization();
    params.addParameter(KNNWeightOutlier.Parameterizer.K_ID, 4);

    // setup Algorithm
    KNNWeightOutlier<DoubleVector> knnWeightOutlier =
        ClassGenericsUtil.parameterizeOrAbort(KNNWeightOutlier.class, params);
    testParameterizationOk(params);

    // run KNNWeightOutlier on database
    OutlierResult result = knnWeightOutlier.run(db);

    testSingleScore(result, 945, 2.384117261027324);
    testAUC(db, "Noise", result, 0.9912777777777778);
  }
  @Test
  public void testDBOutlierDetection() {
    Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025);

    // Parameterization
    ListParameterization params = new ListParameterization();
    params.addParameter(DBOutlierDetection.Parameterizer.D_ID, 0.175);
    params.addParameter(DBOutlierDetection.Parameterizer.P_ID, 0.98);

    // setup Algorithm
    DBOutlierDetection<DoubleVector> dbOutlierDetection =
        ClassGenericsUtil.parameterizeOrAbort(DBOutlierDetection.class, params);
    testParameterizationOk(params);

    // run DBOutlierDetection on database
    OutlierResult result = dbOutlierDetection.run(db);

    testSingleScore(result, 1025, 0.0);
    testAUC(db, "Noise", result, 0.97487179);
  }
 @Override
 protected void makeOptions(Parameterization config) {
   super.makeOptions(config);
   format = ClassGenericsUtil.parameterizeOrAbort(CSVReaderFormat.class, config);
 }