예제 #1
0
  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    mDelimiter = conf.get(ArgumentsConstants.DELIMITER, "\t");

    mIndexArr =
        CommonMethods.convertIndexStr2IntArr(conf.get(ArgumentsConstants.TARGET_INDEX, "-1"));
    mNominalIndexArr =
        CommonMethods.convertIndexStr2IntArr(conf.get(ArgumentsConstants.NOMINAL_INDEX, "-1"));
    mExceptionIndexArr =
        CommonMethods.convertIndexStr2IntArr(conf.get(ArgumentsConstants.EXCEPTION_INDEX, "-1"));

    mClusterCnt = Integer.parseInt(conf.get(ArgumentsConstants.CLUSTER_COUNT, "1"));

    // cluster load and setting
    Path clusterPath = new Path(conf.get(ArgumentsConstants.CLUSTER_PATH, null));
    mClusters =
        KMeansClusterInfoMgr.loadClusterInfoFile(conf, clusterPath, mClusterCnt, mDelimiter);
  }
예제 #2
0
  @Override
  protected void map(Object key, Text value, Context context)
      throws IOException, InterruptedException {
    String[] columns = value.toString().split(mDelimiter);
    int clusterIndex = -1;

    /** cluster index get */
    double distMin = 99999999;
    for (int k = 0; k < mClusterCnt; k++) {
      double attrDistanceSum = 0;
      double attrCnt = 0;

      /** TODO: total distance - euclidean */
      for (int i = 0; i < columns.length; i++) {
        double distAttr = 0;

        if (CommonMethods.isContainIndex(mIndexArr, i, true)
            && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) {
          attrCnt = attrCnt + 1;
          if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) {
            distAttr =
                mClusters[k].getAttributeDistance(
                    i, columns[i], ConfigurationVariable.NOMINAL_ATTRIBUTE);
          } else
            distAttr =
                mClusters[k].getAttributeDistance(
                    i, columns[i], ConfigurationVariable.NUMERIC_ATTRIBUTE);

          attrDistanceSum += Math.pow(distAttr, 2);
        }
      }

      double dist = Math.sqrt(attrDistanceSum);
      if (dist < distMin) {
        distMin = dist;
        clusterIndex = k;
      }
    }

    context.write(new IntWritable(clusterIndex), value);
  }