Beispiel #1
0
  public Clustering(List<? extends Instance> points) {
    HashMap<Integer, Integer> labelMap = classValues(points);
    int dim = points.get(0).dataset().numAttributes() - 1;

    int numClasses = labelMap.size();
    int noiseLabel;

    Attribute classLabel = points.get(0).dataset().classAttribute();
    int lastLabelIndex = classLabel.numValues() - 1;
    if (classLabel.value(lastLabelIndex) == "noise") {
      noiseLabel = lastLabelIndex;
    } else {
      noiseLabel = -1;
    }

    ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++) {
      sorted_points[i] = new ArrayList<Instance>();
    }
    for (Instance point : points) {
      int clusterid = (int) point.classValue();
      if (clusterid == noiseLabel) continue;
      sorted_points[labelMap.get(clusterid)].add((Instance) point);
    }
    this.clusters = new AutoExpandVector<Cluster>();
    for (int i = 0; i < numClasses; i++) {
      if (sorted_points[i].size() > 0) {
        SphereCluster s = new SphereCluster(sorted_points[i], dim);
        s.setId(sorted_points[i].get(0).classValue());
        s.setGroundTruth(sorted_points[i].get(0).classValue());
        clusters.add(s);
      }
    }
  }
Beispiel #2
0
  public Clustering(ArrayList<DataPoint> points, double overlapThreshold, int initMinPoints) {
    HashMap<Integer, Integer> labelMap = Clustering.classValues(points);
    int dim = points.get(0).dataset().numAttributes() - 1;

    int numClasses = labelMap.size();
    int num = 0;

    ArrayList<DataPoint>[] sorted_points = (ArrayList<DataPoint>[]) new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++) {
      sorted_points[i] = new ArrayList<DataPoint>();
    }
    for (DataPoint point : points) {
      int clusterid = (int) point.classValue();
      if (clusterid == -1) continue;
      sorted_points[labelMap.get(clusterid)].add(point);
      num++;
    }

    clusters = new AutoExpandVector<Cluster>();
    int microID = 0;
    for (int i = 0; i < numClasses; i++) {
      ArrayList<SphereCluster> microByClass = new ArrayList<SphereCluster>();
      ArrayList<DataPoint> pointInCluster = new ArrayList<DataPoint>();
      ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList();

      pointInCluster.addAll(sorted_points[i]);
      while (pointInCluster.size() > 0) {
        ArrayList<Instance> micro_points = new ArrayList<Instance>();
        for (int j = 0; j < initMinPoints && !pointInCluster.isEmpty(); j++) {
          micro_points.add((Instance) pointInCluster.get(0));
          pointInCluster.remove(0);
        }
        if (micro_points.size() > 0) {
          SphereCluster s = new SphereCluster(micro_points, dim);
          for (int c = 0; c < microByClass.size(); c++) {
            if (((SphereCluster) microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold) {
              micro_points.addAll(pointInMicroClusters.get(c));
              s = new SphereCluster(micro_points, dim);
              pointInMicroClusters.remove(c);
              microByClass.remove(c);
              // System.out.println("Removing redundant cluster based on radius overlap"+c);
            }
          }
          for (int j = 0; j < pointInCluster.size(); j++) {
            Instance instance = pointInCluster.get(j);
            if (s.getInclusionProbability(instance) > 0.8) {
              pointInCluster.remove(j);
              micro_points.add(instance);
            }
          }
          s.setWeight(micro_points.size());
          microByClass.add(s);
          pointInMicroClusters.add(micro_points);
          microID++;
        }
      }
      //
      boolean changed = true;
      while (changed) {
        changed = false;
        for (int c = 0; c < microByClass.size(); c++) {
          for (int c1 = c + 1; c1 < microByClass.size(); c1++) {
            double overlap = microByClass.get(c).overlapRadiusDegree(microByClass.get(c1));
            //                        System.out.println("Overlap C"+(clustering.size()+c)+"
            // ->C"+(clustering.size()+c1)+": "+overlap);
            if (overlap > overlapThreshold) {
              pointInMicroClusters.get(c).addAll(pointInMicroClusters.get(c1));
              SphereCluster s = new SphereCluster(pointInMicroClusters.get(c), dim);
              microByClass.set(c, s);
              pointInMicroClusters.remove(c1);
              microByClass.remove(c1);
              changed = true;
              break;
            }
          }
        }
      }
      for (int j = 0; j < microByClass.size(); j++) {
        microByClass.get(j).setGroundTruth(sorted_points[i].get(0).classValue());
        clusters.add(microByClass.get(j));
      }
    }
    for (int j = 0; j < clusters.size(); j++) {
      clusters.get(j).setId(j);
    }
  }