예제 #1
0
파일: HCTest.java 프로젝트: deric/clueminer
  @Test
  public void testColumnClustering() throws IOException {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount());
    assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount());

    result.getTreeData().print();
  }
예제 #2
0
  /**
   * Sample covariance
   *
   * @param dataset
   * @return
   */
  protected double covariance(Dataset<E> dataset) {
    Matrix m = dataset.asMatrix();
    Matrix cov = new SymmetricMatrixDiag(m.columnsCount());

    DenseVector mean = new DenseVector(dataset.attributeCount());
    for (int i = 0; i < mean.size(); i++) {
      mean.set(i, dataset.getAttribute(i).statistics(StatsNum.MEAN));
    }

    Vector v;
    double res, sum = 0.0;
    for (int i = 0; i < m.rowsCount(); i++) {
      v = m.getRowVector(i).minus(mean);
      res = v.dot(v);
      sum += res;
    }
    return sum / (m.rowsCount() - 1);

    /* for (int i = 0; i < m.columnsCount(); i++) {            mean = dataset.getAttribute(i).statistics(StatsNum.AVG);
        cov.set(i, i, dataset.getAttribute(i).statistics(StatsNum.VARIANCE));
        for (int j = 0; j < i; j++) {
            //cov.set(i, j, mean);
        }
    } */

    // return cov;
  }
예제 #3
0
 @Override
 public int size() {
   switch (resultType) {
     case COLUMNS_CLUSTERING:
       return dataset.attributeCount();
     case ROWS_CLUSTERING:
       return dataset.size();
   }
   throw new RuntimeException("Don't know wether cluster rows or columns.");
 }