/** Reads the input data from the file and stores the data points in the vector */ public void readData() throws IOException { BufferedReader in = new BufferedReader(new FileReader(this.inputFileName)); String line = ""; while ((line = in.readLine()) != null) { // StringTokenizer st = new StringTokenizer(line, " \t\n\r\f,"); String[] st = line.split(":"); // 3维 if (st.length == 3) { kMeansPoint dp = new kMeansPoint( Double.parseDouble(st[0]), Double.parseDouble(st[1]), Double.parseDouble(st[2])); dp.assignToCluster(0); this.kMeansPoints.add(dp); } if (st.length >= 2) { List<Double> listST = new ArrayList<Double>(); for (String str : st) { listST.add(Double.parseDouble(str)); } kMeansPoint dp = new kMeansPoint(listST); dp.assignToCluster(0); this.kMeansPoints.add(dp); } } in.close(); } // end of readData()
/** * Assigns a data point to one of the k clusters based on its distance from the means of the * clusters * * @param dp data point to be assigned */ public void assignToCluster(kMeansPoint dp) { int currentCluster = dp.getClusterNumber(); if (dp.getListData().size() == 3) { double minDistance = kMeansPoint.distance(dp, this.clusters[currentCluster].getMean()); ; for (int i = 0; i < this.k; i++) if (kMeansPoint.distance(dp, this.clusters[i].getMean()) < minDistance) { minDistance = kMeansPoint.distance(dp, this.clusters[i].getMean()); currentCluster = i; } } else { double minDistance = kMeansPoint.comPointsDis(dp, this.clusters[currentCluster].getMean()); ; for (int i = 0; i < this.k; i++) if (kMeansPoint.comPointsDis(dp, this.clusters[i].getMean()) < minDistance) { minDistance = kMeansPoint.comPointsDis(dp, this.clusters[i].getMean()); currentCluster = i; } } dp.assignToCluster(currentCluster); } // end of assignToCluster
/** * Updates the means of all k clusters, and returns if they have changed or not * * @return have the updated means of the clusters changed or not */ private boolean updateMeans() { boolean reply = false; if (((kMeansPoint) this.kMeansPoints.get(0)).getListData().size() == 3) { double[] x = new double[this.k]; double[] y = new double[this.k]; double[] z = new double[this.k]; int[] size = new int[this.k]; kMeansPoint[] pastMeans = new kMeansPoint[this.k]; for (int i = 0; i < this.k; i++) { x[i] = 0.0; y[i] = 0.0; z[i] = 0.0; size[i] = 0; pastMeans[i] = this.clusters[i].getMean(); } Iterator i = this.kMeansPoints.iterator(); while (i.hasNext()) { kMeansPoint dp = (kMeansPoint) (i.next()); int currentCluster = dp.getClusterNumber(); x[currentCluster] += dp.getX(); y[currentCluster] += dp.getY(); z[currentCluster] += dp.getZ(); size[currentCluster]++; } for (int j = 0; j < this.k; j++) if (size[j] != 0) { x[j] /= size[j]; y[j] /= size[j]; z[j] /= size[j]; kMeansPoint temp = new kMeansPoint(x[j], y[j], z[j]); temp.assignToCluster(j); this.clusters[j].setMean(temp); if (kMeansPoint.distance(pastMeans[j], this.clusters[j].getMean()) != 0) reply = true; } } else { double[][] douAVE = new double[((kMeansPoint) this.kMeansPoints.get(0)).getListData().size()][this.k]; int[] size = new int[this.k]; kMeansPoint[] pastMeans = new kMeansPoint[this.k]; for (int i = 0; i < this.k; i++) { for (int j = 0; j < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); j++) { douAVE[j][i] = 0.0; } size[i] = 0; pastMeans[i] = this.clusters[i].getMean(); } Iterator i = this.kMeansPoints.iterator(); while (i.hasNext()) { kMeansPoint dp = (kMeansPoint) (i.next()); int currentCluster = dp.getClusterNumber(); for (int j = 0; j < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); j++) { douAVE[j][currentCluster] += dp.getListData().get(j); } size[currentCluster]++; } for (int j = 0; j < this.k; j++) if (size[j] != 0) { for (int k = 0; k < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); k++) { douAVE[k][j] /= size[j]; } List<Double> listKM = new ArrayList<Double>(); for (int k = 0; k < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); k++) { listKM.add(douAVE[k][j]); } kMeansPoint temp = new kMeansPoint(listKM); temp.assignToCluster(j); this.clusters[j].setMean(temp); if (kMeansPoint.comPointsDis(pastMeans[j], this.clusters[j].getMean()) != 0) reply = true; } } return reply; } // end of updateMeans()