Пример #1
0
  @Override
  protected void map(WritableComparable<?> key, Text point, Context context)
      throws IOException, InterruptedException {

    Cluster nearestCluster = null;
    double nearestDistance = Double.MAX_VALUE;
    Vector pointv = parse.parseVector(point.toString());
    if (pointv == null) {
      return;
    }
    pointv.setNumPoints(1);
    for (Cluster cluster : clusters) {
      Vector clusterCenter = cluster.getCenter();

      boolean isDeny = pointv.Deny(clusterCenter);
      if (isDeny) {
        continue;
      }
      double distance = clusterCenter.distiance(pointv);
      context.getCounter("Clustering", "similar").increment(1);

      if (distance <= nearestDistance || nearestCluster == null) {
        nearestCluster = cluster;
        nearestDistance = distance;
      }
    }
    if (nearestCluster != null) {
      context.write(new Text(String.valueOf(nearestCluster.getId())), pointv);
    }
  }
Пример #2
0
  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    this.clusters.clear();

    Configuration conf = context.getConfiguration();
    parse.setup(conf);

    try {

      String clusterPath = conf.get(KMeansDriver.CLUSTER_PATH_KEY);
      if (clusterPath != null && clusterPath.length() > 0) {
        KmeansPublic.configureWithClusterInfo(conf, new Path(clusterPath), clusters);
        if (clusters.isEmpty()) {
          throw new IllegalStateException("No clusters found. Check your -c path.");
        }
      }
    } catch (Throwable e) {
      throw new IllegalStateException(e);
    }
  }