/**
  * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then
  * add the center of each to the other. If it covers any other canopies (norm<T2), then merge the
  * given canopy with the closest covering canopy. If the given canopy does not cover any other
  * canopies, add it to the canopies list.
  *
  * @param aCanopy a MeanShiftCanopy to be merged
  * @param canopies the List<Canopy> to be appended
  */
 public void mergeCanopy(MeanShiftCanopy aCanopy, Collection<MeanShiftCanopy> canopies) {
   MeanShiftCanopy closestCoveringCanopy = null;
   double closestNorm = Double.MAX_VALUE;
   for (MeanShiftCanopy canopy : canopies) {
     double norm = measure.distance(canopy.getCenter(), aCanopy.getCenter());
     if (norm < t1) {
       aCanopy.touch(canopy);
     }
     if (norm < t2 && (closestCoveringCanopy == null || norm < closestNorm)) {
       closestNorm = norm;
       closestCoveringCanopy = canopy;
     }
   }
   if (closestCoveringCanopy == null) {
     canopies.add(aCanopy);
   } else {
     closestCoveringCanopy.merge(aCanopy);
   }
 }
  public static void mapPoint(
      IntWritable clusterId,
      WeightedVectorWritable point,
      DistanceMeasure measure,
      Map<Integer, List<VectorWritable>> representativePoints,
      Map<Integer, WeightedVectorWritable> mostDistantPoints) {
    int key = clusterId.get();
    WeightedVectorWritable currentMDP = mostDistantPoints.get(key);

    List<VectorWritable> repPoints = representativePoints.get(key);
    double totalDistance = 0.0;
    for (VectorWritable refPoint : repPoints) {
      totalDistance += measure.distance(refPoint.get(), point.getVector());
    }
    if (currentMDP == null || currentMDP.getWeight() < totalDistance) {
      mostDistantPoints.put(
          key, new WeightedVectorWritable(totalDistance, point.getVector().clone()));
    }
  }
 public MeanShiftCanopyClusterer(Configuration configuration) {
   try {
     measure =
         Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY))
             .asSubclass(DistanceMeasure.class)
             .newInstance();
     measure.configure(configuration);
   } catch (ClassNotFoundException e) {
     throw new IllegalStateException(e);
   } catch (IllegalAccessException e) {
     throw new IllegalStateException(e);
   } catch (InstantiationException e) {
     throw new IllegalStateException(e);
   }
   // nextCanopyId = 0; // never read?
   t1 = Double.parseDouble(configuration.get(MeanShiftCanopyConfigKeys.T1_KEY));
   t2 = Double.parseDouble(configuration.get(MeanShiftCanopyConfigKeys.T2_KEY));
   convergenceDelta =
       Double.parseDouble(configuration.get(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY));
 }
  //    @Test
  public void testSearch() throws Exception {
    int nVar = 10;
    final DistanceMeasure distance = new EuclideanDistanceMeasure();
    // WeightedEuclideanDistanceMeasure weightFunction = new WeightedEuclideanDistanceMeasure();
    // Vector w = new DenseVector(nVar);
    // w.assign(1);
    // w.viewPart(0, 5).assign(2);
    // w.viewPart(5, 5).assign(1);
    // weightFunction.setWeights(w);
    double d1 = 0;
    double d2 = 0;
    double d3 = 0;
    double t1 = 0;
    double t2 = 0;
    double t3 = 0;
    double tsim = 0.0;
    double sim;
    int nearest = 100;
    int numberOfNeighbors = 100;
    int sz;
    int tsz = 0;
    // LocalitySensitiveHash lsh = new LocalitySensitiveHash(weightFunction, nVar);
    LocalitySensitiveHash lsh = new LocalitySensitiveHash(distance, nVar, 2000);
    List<Vector> randomNeighbor = Lists.newArrayList();
    List<Vector> orgNeighbor = Lists.newArrayList();
    List<Vector> ref = Lists.newArrayList();
    // final DoubleFunction random = Functions.random();
    List<Vector> inputList = readInputFile("/Users/dixu/Documents/Amex/kNN/kMeansTestFile.csv");
    for (int i = 0; i < 40000; i++) {
      // Vector v = inputList.get(i);
      // v.assign(random);
      lsh.add(inputList.get(i), i);
      ref.add(inputList.get(i));
      orgNeighbor.add(inputList.get(i));
    }
    randomNeighbor.addAll(ref.subList(0, numberOfNeighbors));

    long runningTime = 0;
    for (int i = 40100; i < (40100 + nearest); i++) {
      final Vector v = inputList.get(i);
      // v.assign(random);
      long time1 = System.nanoTime();
      List<WeightedVector> rx = lsh.search(v, numberOfNeighbors);

      List<Vector> lshNeighbor = Lists.newArrayList();
      for (WeightedVector obs : rx) {
        lshNeighbor.add(obs.getVector());
      }
      long time2 = System.nanoTime();
      runningTime = runningTime + time2 - time1;

      sz = lsh.countVectors();

      Ordering<Vector> queryOrder =
          new Ordering<Vector>() {
            @Override
            public int compare(Vector v1, Vector v2) {
              return Double.compare(distance.distance(v, v1), distance.distance(v, v2));
            }
          };
      Collections.sort(orgNeighbor, queryOrder);
      List<Vector> trueNeighbor = orgNeighbor.subList(0, numberOfNeighbors);
      List<Vector> intersection1 = ListUtils.intersection(trueNeighbor, lshNeighbor);
      sim = intersection1.size() / (double) numberOfNeighbors;

      for (int j = 0; j < numberOfNeighbors; j++) {
        d1 += distance.distance(v, lshNeighbor.get(j));
        d2 += distance.distance(v, randomNeighbor.get(j));
        d3 += distance.distance(v, trueNeighbor.get(j));
      }
      d1 = d1 / numberOfNeighbors;
      d2 = d2 / numberOfNeighbors;
      d3 = d3 / numberOfNeighbors;
      t1 += d1;
      t2 += d2;
      t3 += d3;
      tsim += sim;
      tsz += sz;
    }
    t1 = t1 / nearest;
    t2 = t2 / nearest;
    t3 = t3 / nearest;
    tsim = tsim / nearest;
    tsz = tsz / nearest;
    System.out.printf(
        "ave_search=%d ave_sim=%.2f trueNeighbor_dist=%.2f proxyNeighbor_dist=%.2f "
            + "randomNeighbor_dist=%.2f \n",
        tsz, tsim, t3, t1, t2);
    System.out.printf("running time = %.2f seconds \n", runningTime / 1e9);
  }
 /**
  * Return if the cluster is converged by comparing its center and centroid.
  *
  * @param measure The distance measure to use for cluster-point comparisons.
  * @param convergenceDelta the convergence delta to use for stopping.
  * @return if the cluster is converged
  */
 public boolean computeConvergence(DistanceMeasure measure, double convergenceDelta) {
   Vector centroid = computeCentroid();
   converged =
       measure.distance(centroid.getLengthSquared(), centroid, getCenter()) <= convergenceDelta;
   return converged;
 }
 /**
  * Return if the point is closely covered by the canopy
  *
  * @param canopy a canopy.
  * @param point a Vector point
  * @return if the point is covered
  */
 public boolean closelyBound(MeanShiftCanopy canopy, Vector point) {
   return measure.distance(canopy.getCenter(), point) < t2;
 }
 /**
  * Return if the point is covered by this canopy
  *
  * @param canopy a canopy.
  * @param point a Vector point
  * @return if the point is covered
  */
 boolean covers(MeanShiftCanopy canopy, Vector point) {
   return measure.distance(canopy.getCenter(), point) < t1;
 }