@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); mDelimiter = conf.get(ArgumentsConstants.DELIMITER, "\t"); mIndexArr = CommonMethods.convertIndexStr2IntArr(conf.get(ArgumentsConstants.TARGET_INDEX, "-1")); mNominalIndexArr = CommonMethods.convertIndexStr2IntArr(conf.get(ArgumentsConstants.NOMINAL_INDEX, "-1")); mExceptionIndexArr = CommonMethods.convertIndexStr2IntArr(conf.get(ArgumentsConstants.EXCEPTION_INDEX, "-1")); mClusterCnt = Integer.parseInt(conf.get(ArgumentsConstants.CLUSTER_COUNT, "1")); // cluster load and setting Path clusterPath = new Path(conf.get(ArgumentsConstants.CLUSTER_PATH, null)); mClusters = KMeansClusterInfoMgr.loadClusterInfoFile(conf, clusterPath, mClusterCnt, mDelimiter); }
@Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] columns = value.toString().split(mDelimiter); int clusterIndex = -1; /** cluster index get */ double distMin = 99999999; for (int k = 0; k < mClusterCnt; k++) { double attrDistanceSum = 0; double attrCnt = 0; /** TODO: total distance - euclidean */ for (int i = 0; i < columns.length; i++) { double distAttr = 0; if (CommonMethods.isContainIndex(mIndexArr, i, true) && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) { attrCnt = attrCnt + 1; if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) { distAttr = mClusters[k].getAttributeDistance( i, columns[i], ConfigurationVariable.NOMINAL_ATTRIBUTE); } else distAttr = mClusters[k].getAttributeDistance( i, columns[i], ConfigurationVariable.NUMERIC_ATTRIBUTE); attrDistanceSum += Math.pow(distAttr, 2); } } double dist = Math.sqrt(attrDistanceSum); if (dist < distMin) { distMin = dist; clusterIndex = k; } } context.write(new IntWritable(clusterIndex), value); }