Beispiel #1
0
  /**
   * split the neighbor hood in two groups based on 2 k-means
   *
   * @param neighborhood
   * @return
   */
  private Pair<List<Gene>, List<Gene>> twoMeanClusterSplit(List<Gene> neighborhood) {
    final int n = neighborhood.size();

    final int maxit = desc.getMaxit();
    final double eps = desc.getEps();

    int a_start = r.nextInt(n);
    int b_start = r.nextInt(n);
    Gene a_center = new Gene(1, -1, Arrays.copyOf(neighborhood.get(a_start).data, samples));
    Gene b_center = new Gene(1, -1, Arrays.copyOf(neighborhood.get(b_start).data, samples));
    float[] a_center_pong = new float[samples];
    Arrays.fill(a_center_pong, Float.NaN);
    float[] b_center_pong = new float[samples];
    Arrays.fill(b_center_pong, Float.NaN);

    float[] tmp;
    BitSet partOf_a = new BitSet(n);

    double d_old = 0;
    for (int i = 0; i < maxit; ++i) {
      int j = 0;
      int changed = 0;
      double d_new = 0;
      for (Gene gene : neighborhood) {
        final double a_distance = distance(a_center, gene);
        final double b_distance = distance(b_center, gene);
        final boolean in_a = a_distance < b_distance;
        if (partOf_a.get(j) != in_a) {
          changed++;
          partOf_a.set(j, in_a);
        }
        d_new += in_a ? a_distance : b_distance;
        tmp = in_a ? a_center_pong : b_center_pong;
        // shift new center
        for (int k = 0; k < samples; ++k) {
          if (!gene.isNaN(k)) {
            if (Float.isNaN(tmp[k])) tmp[k] = gene.get(k);
            else tmp[k] += gene.get(k);
          }
        }
        j++;
      }
      if (changed == 0 || d_new == 0) break;
      final double ratio = Math.abs(d_new - d_old) / d_old;
      if (i > 0 && ratio < eps) break;
      d_old = d_new;
      int a_n = partOf_a.cardinality();
      int b_n = n - a_n;
      if (a_n == 0 || b_n == 0) {
        // FIXME
      }
      updateCenter(a_center, a_center_pong, a_n);
      updateCenter(b_center, b_center_pong, b_n);
    }

    return split(neighborhood, partOf_a);
  }
Beispiel #2
0
 private double distance(Gene target, Gene neighbor) {
   double acc = 0;
   int n = 0;
   for (int sample = 0; sample < samples; ++sample) {
     if (target.isNaN(sample) || neighbor.isNaN(sample)) // skip missing
     continue;
     double dx = target.get(sample) - neighbor.get(sample);
     acc += dx * dx;
     n++;
   }
   if (n > 0) {
     return acc / n; // FIXME according to the fortran code, this is not the eucledian distance
     // return Math.sqrt(acc);
   }
   return Double.POSITIVE_INFINITY;
 }
Beispiel #3
0
 private Sample computeSample(final int sample) {
   int nans = 0;
   double sum = 0;
   int n = 0;
   for (Gene gene : genes) {
     double v = gene.get(sample);
     if (isNaN(v)) nans++;
     else {
       sum += v;
       n++;
     }
   }
   return new Sample(sum / n, nans);
 }
Beispiel #4
0
 public double get(int sample) {
   return gene.get(sample);
 }