Пример #1
0
  /** Reads the input data from the file and stores the data points in the vector */
  public void readData() throws IOException {

    BufferedReader in = new BufferedReader(new FileReader(this.inputFileName));
    String line = "";
    while ((line = in.readLine()) != null) {

      //			StringTokenizer st = new StringTokenizer(line, " \t\n\r\f,");
      String[] st = line.split(":");
      // 3维
      if (st.length == 3) {

        kMeansPoint dp =
            new kMeansPoint(
                Double.parseDouble(st[0]), Double.parseDouble(st[1]), Double.parseDouble(st[2]));
        dp.assignToCluster(0);
        this.kMeansPoints.add(dp);
      }

      if (st.length >= 2) {
        List<Double> listST = new ArrayList<Double>();
        for (String str : st) {
          listST.add(Double.parseDouble(str));
        }
        kMeansPoint dp = new kMeansPoint(listST);
        dp.assignToCluster(0);
        this.kMeansPoints.add(dp);
      }
    }

    in.close();
  } // end of readData()
Пример #2
0
  /**
   * Assigns a data point to one of the k clusters based on its distance from the means of the
   * clusters
   *
   * @param dp data point to be assigned
   */
  public void assignToCluster(kMeansPoint dp) {

    int currentCluster = dp.getClusterNumber();
    if (dp.getListData().size() == 3) {
      double minDistance = kMeansPoint.distance(dp, this.clusters[currentCluster].getMean());
      ;

      for (int i = 0; i < this.k; i++)
        if (kMeansPoint.distance(dp, this.clusters[i].getMean()) < minDistance) {

          minDistance = kMeansPoint.distance(dp, this.clusters[i].getMean());
          currentCluster = i;
        }
    } else {
      double minDistance = kMeansPoint.comPointsDis(dp, this.clusters[currentCluster].getMean());
      ;

      for (int i = 0; i < this.k; i++)
        if (kMeansPoint.comPointsDis(dp, this.clusters[i].getMean()) < minDistance) {

          minDistance = kMeansPoint.comPointsDis(dp, this.clusters[i].getMean());
          currentCluster = i;
        }
    }

    dp.assignToCluster(currentCluster);
  } // end of assignToCluster
Пример #3
0
  /**
   * Main method -- to test the kMeans class
   *
   * @param args command line arguments
   */
  public static void main(String[] args) {

    //		//对用户矩阵,从3到100个聚类数目,分别计算这个数目下的聚类结果,写入文件
    //		for(int z = 3; z <=100; z++){
    //
    //			//计算z个聚类的聚类结果
    //			kMeans km = new kMeans(z, "D:/pca/pcauserM0.85.txt");
    //
    //			try {
    //				km.readData();
    //			} catch (Exception e) {
    //				System.err.println(e);
    //				System.exit(-1);
    //			}
    //
    //			km.runKMeans();
    //			System.out.println(km.getDataPoints().size());
    //			System.out.println(km.getDataPoints().get(0));
    //			System.out.println(km.getDataPoints().get(km.getDataPoints().size()-1));
    //
    //	        //把结果写入文件
    ////			Iterator i = this.kMeansPoints.iterator();
    ////			while (i.hasNext()){
    ////				//设置一个String用来写入到文件
    ////				String strWr = new String();
    ////				strWr = "\n" + (kMeansPoint)(i.next());
    ////			}
    //
    //			//把结果写入文件
    //			for(int i = 0; i < km.getDataPoints().size(); i++){
    //				//设置一个String用来写入到文件
    //				String strWr = new String();
    //				kMeansPoint kmp = (kMeansPoint) km.getDataPoints().get(i);
    //				strWr = i+":"+ kmp.getClusterNumber();
    //
    //				//设置一个路径用来保存文件
    //				String pathZ = "D:/pca/difClusters/kM_" + z + "kind_0.85_0.3user.txt";
    //				write(pathZ, strWr);
    //			}
    //		}

    //		//给电影矩阵聚类,从3到100个聚类数目,分别计算这个数目下的聚类结果,写入文件
    //		for(int z = 3; z <=100; z++){
    //			//计算z个聚类的聚类结果
    //			kMeans km = new kMeans(z, "D:/pca/pcauserMovieM0.85.txt");
    //
    //			try {
    //				km.readData();
    //			} catch (Exception e) {
    //				System.err.println(e);
    //				System.exit(-1);
    //			}
    //
    //			km.runKMeans();
    //			System.out.println(km.getDataPoints().size());
    //			System.out.println(km.getDataPoints().get(0));
    //			System.out.println(km.getDataPoints().get(km.getDataPoints().size()-1));
    //
    //	        //把结果写入文件
    ////			Iterator i = this.kMeansPoints.iterator();
    ////			while (i.hasNext()){
    ////				//设置一个String用来写入到文件
    ////				String strWr = new String();
    ////				strWr = "\n" + (kMeansPoint)(i.next());
    ////			}
    //
    //			//把结果写入文件
    //			for(int i = 0; i < km.getDataPoints().size(); i++){
    //				//设置一个String用来写入到文件
    //				String strWr = new String();
    //				kMeansPoint kmp = (kMeansPoint) km.getDataPoints().get(i);
    //				strWr = i+":"+ kmp.getClusterNumber();
    //
    //				//设置一个路径用来保存文件
    //				String pathZ = "D:/pca/difMovieClusters/kM_" + z + "kind_0.85_0.3movie.txt";
    //				write(pathZ, strWr);
    //			}
    //    	}

    // 给用户-电影类别矩阵聚类,从3到100个聚类数目,分别计算这个数目下的聚类结果,写入文件
    for (int z = 238; z <= 300; z++) {
      // 计算z个聚类的聚类结果
      kMeans km = new kMeans(z, "D:/pca/recommendWork/pcamovieByAdd1Matrix0.85.txt");

      try {
        km.readData();
      } catch (Exception e) {
        System.err.println(e);
        System.exit(-1);
      }

      km.runKMeans();
      System.out.println(km.getDataPoints().size());
      System.out.println(km.getDataPoints().get(0));
      System.out.println(km.getDataPoints().get(km.getDataPoints().size() - 1));

      // 把结果写入文件
      //			Iterator i = this.kMeansPoints.iterator();
      //			while (i.hasNext()){
      //				//设置一个String用来写入到文件
      //				String strWr = new String();
      //				strWr = "\n" + (kMeansPoint)(i.next());
      //			}s

      // 把结果写入文件
      for (int i = 0; i < km.getDataPoints().size(); i++) {
        // 设置一个String用来写入到文件
        String strWr = new String();
        kMeansPoint kmp = (kMeansPoint) km.getDataPoints().get(i);
        strWr = i + ":" + kmp.getClusterNumber();

        // 设置一个路径用来保存文件
        String pathZ = "D:/pca/recommendWork/difMovClusters/kM_" + z + "kind_0.85_clu_movie.txt";
        write(pathZ, strWr);
      }
    }
  } // end of main()
Пример #4
0
  /**
   * Updates the means of all k clusters, and returns if they have changed or not
   *
   * @return have the updated means of the clusters changed or not
   */
  private boolean updateMeans() {

    boolean reply = false;
    if (((kMeansPoint) this.kMeansPoints.get(0)).getListData().size() == 3) {
      double[] x = new double[this.k];
      double[] y = new double[this.k];
      double[] z = new double[this.k];
      int[] size = new int[this.k];
      kMeansPoint[] pastMeans = new kMeansPoint[this.k];

      for (int i = 0; i < this.k; i++) {

        x[i] = 0.0;
        y[i] = 0.0;
        z[i] = 0.0;
        size[i] = 0;
        pastMeans[i] = this.clusters[i].getMean();
      }

      Iterator i = this.kMeansPoints.iterator();
      while (i.hasNext()) {

        kMeansPoint dp = (kMeansPoint) (i.next());
        int currentCluster = dp.getClusterNumber();

        x[currentCluster] += dp.getX();
        y[currentCluster] += dp.getY();
        z[currentCluster] += dp.getZ();
        size[currentCluster]++;
      }

      for (int j = 0; j < this.k; j++)
        if (size[j] != 0) {

          x[j] /= size[j];
          y[j] /= size[j];
          z[j] /= size[j];
          kMeansPoint temp = new kMeansPoint(x[j], y[j], z[j]);
          temp.assignToCluster(j);
          this.clusters[j].setMean(temp);
          if (kMeansPoint.distance(pastMeans[j], this.clusters[j].getMean()) != 0) reply = true;
        }
    } else {
      double[][] douAVE =
          new double[((kMeansPoint) this.kMeansPoints.get(0)).getListData().size()][this.k];
      int[] size = new int[this.k];
      kMeansPoint[] pastMeans = new kMeansPoint[this.k];
      for (int i = 0; i < this.k; i++) {

        for (int j = 0; j < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); j++) {
          douAVE[j][i] = 0.0;
        }
        size[i] = 0;
        pastMeans[i] = this.clusters[i].getMean();
      }

      Iterator i = this.kMeansPoints.iterator();
      while (i.hasNext()) {

        kMeansPoint dp = (kMeansPoint) (i.next());
        int currentCluster = dp.getClusterNumber();

        for (int j = 0; j < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); j++) {
          douAVE[j][currentCluster] += dp.getListData().get(j);
        }
        size[currentCluster]++;
      }

      for (int j = 0; j < this.k; j++)
        if (size[j] != 0) {

          for (int k = 0; k < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); k++) {
            douAVE[k][j] /= size[j];
          }
          List<Double> listKM = new ArrayList<Double>();
          for (int k = 0; k < ((kMeansPoint) this.kMeansPoints.get(0)).getListData().size(); k++) {
            listKM.add(douAVE[k][j]);
          }
          kMeansPoint temp = new kMeansPoint(listKM);
          temp.assignToCluster(j);
          this.clusters[j].setMean(temp);
          if (kMeansPoint.comPointsDis(pastMeans[j], this.clusters[j].getMean()) != 0) reply = true;
        }
    }

    return reply;
  } // end of updateMeans()