コード例 #1
0
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, ClusterWritable> output,
        Reporter reporter)
        throws IOException {

      String movieIdStr = new String();
      String reviewStr = new String();
      String userIdStr = new String();
      String reviews = new String();
      String line = new String();
      String tok = new String("");
      long movieId;
      int review, userId, p, q, r, rater, rating, movieIndex;
      int clusterId = 0;
      int[] n = new int[maxClusters];
      float[] sq_a = new float[maxClusters];
      float[] sq_b = new float[maxClusters];
      float[] numer = new float[maxClusters];
      float[] denom = new float[maxClusters];
      float max_similarity = 0.0f;
      float similarity = 0.0f;
      Cluster movie = new Cluster();
      ClusterWritable movies_arrl = new ClusterWritable();

      StringBuffer sb = new StringBuffer();

      line = ((Text) value).toString();
      movieIndex = line.indexOf(":");

      for (r = 0; r < maxClusters; r++) {
        numer[r] = 0.0f;
        denom[r] = 0.0f;
        sq_a[r] = 0.0f;
        sq_b[r] = 0.0f;
        n[r] = 0;
      }
      if (movieIndex > 0) {
        movieIdStr = line.substring(0, movieIndex);
        sb.append(movieIdStr).append(":");

        movieId = Long.parseLong(movieIdStr);
        movie.movie_id = movieId;
        reviews = line.substring(movieIndex + 1);
        StringTokenizer token = new StringTokenizer(reviews, ",");

        int attrCnt = 0;
        // while (token.hasMoreTokens()) { Leo
        while (token.hasMoreTokens() && attrCnt < attrNum) {
          tok = token.nextToken();
          int reviewIndex = tok.indexOf("_");
          // userIdStr = tok.substring(0, reviewIndex); //Leo
          userIdStr = String.valueOf(attrCnt);
          reviewStr = tok.substring(reviewIndex + 1);
          if (attrCnt > 0) {
            sb.append(",");
          }
          sb.append(String.valueOf(attrCnt)).append("_").append(reviewStr);
          userId = Integer.parseInt(userIdStr);
          review = Integer.parseInt(reviewStr);
          for (r = 0; r < totalClusters; r++) {
            /*for (q = 0; q < centroids_ref[r].total; q++) {
                rater = centroids_ref[r].reviews.get(q).rater_id;
                rating = (int) centroids_ref[r].reviews.get(q).rating;
                if (userId == rater) {
                    numer[r] += (float) (review * rating);
                    sq_a[r] += (float) (review * review);
                    sq_b[r] += (float) (rating * rating);
                    n[r]++; // counter
                    break; // to avoid multiple ratings by the same reviewer
                }
            }*/
            // Leo
            rating = (int) centroids_ref[r].reviews.get(attrCnt).rating;
            numer[r] += (float) ((review - rating) * (review - rating));

            n[r]++; // counter
          }
          attrCnt++;
        }
        for (p = 0; p < totalClusters; p++) {
          /*denom[p] = (float) ((Math.sqrt((double) sq_a[p])) * (Math
                  .sqrt((double) sq_b[p])));
          if (denom[p] > 0) {
              similarity = numer[p] / denom[p];
              if (similarity > max_similarity) {
                  max_similarity = similarity;
                  clusterId = p;
              }
          }*/
          // Leo
          similarity = 250 - numer[p];
          if (similarity > max_similarity) {
            max_similarity = similarity;
            clusterId = p;
          }
        }

        // movies_arrl.movies.add(line);//Leo
        movies_arrl.movies.add(sb.toString());
        movies_arrl.similarities.add(max_similarity);
        movies_arrl.similarity = max_similarity;
        output.collect(new IntWritable(clusterId), movies_arrl);
        reporter.incrCounter(Counter.WORDS, 1);
      }
    }
コード例 #2
0
ファイル: KMeans.java プロジェクト: Keraunos/TP_KMeans
  public static void main(String[] args) {

    // TEST MEASURE
    //        Point p1 = new Point(-1d, -1d);
    //        Point p2 = new Point(2d, 3d);
    //        System.out.println(measure.d(p1, p2));
    //        System.out.println(measure.s(p1, p2));
    //        return;

    Double[][] data = FileHandler.readFile(fileName);

    // cannot display points if dimension is > 2
    if (data[0].length != 2) canDisplay = false;

    // build graphic points from coords' array
    buildPointsFromData(data);
    Config.computeBoundingRect(points);

    // init display
    if (canDisplay) {
      disp = new Display();
      disp.setVisible(true);
      for (Point p : points) {
        disp.addObject(p);
      }
    }

    testResults = new double[nbTests];

    for (int t = 0; t < nbTests; ++t) {

      // define K clusters and K temporary centres
      clusters = new ArrayList<Cluster>();
      for (int i = 0; i < K; ++i) {
        clusters.add(new Cluster());
      }
      setRandomCenters();
      for (Cluster c : clusters) {
        System.out.println("center for cluster " + c + ": " + c.getCenter());
      }

      if (canDisplay) pause(1000);

      // variables used in for loops
      double minDist, currDist, diff;
      Double[] prevCoords, newCoords;
      Cluster alloc;
      Point newCenter;

      for (int i = 0; i < maxIter; ++i) {

        if (canDisplay) {
          disp.setLabel("[ iteration #" + (i + 1) + " ]");
        } else {
          System.out.println("------> iteration #" + (i + 1));
        }

        // allocate points to group which center is closest
        for (Point p : points) {

          minDist = Config.MAX;
          alloc = clusters.get(0); // default initialization

          for (Cluster c : clusters) {
            currDist = measure.d(p, c.getCenter());
            if (currDist < minDist) {
              minDist = currDist;
              alloc = c;
            }
          }

          alloc.addPoint(p);
        }

        // recenter: calculate gravity centers for formed groups
        diff = 0;
        prevCoords = null;
        for (Cluster c : clusters) {

          // delete previous center if it not a Point of the Cluster
          if (canDisplay && !c.getPoints().contains(c.getCenter())) {
            disp.removeObject(c.getCenter());
          }

          if (stopOnConverge) {
            prevCoords = c.getCenter().getCoords();
          }

          newCenter = c.makeGravityCenter();

          if (stopOnConverge) {
            newCoords = c.getCenter().getCoords();
            for (int k = 0; k < prevCoords.length; ++k) {
              diff += Math.abs(prevCoords[k] - newCoords[k]);
            }
          }

          if (canDisplay) {
            disp.addObject(newCenter);
          } else {
            // System.out.println("\tcenter for " + c + ": " + c.getCenter());
            System.out.println(c.getCenter());
          }
        } // loop over clusters

        if (canDisplay) {
          disp.repaint();
        }

        // if Clusters' centers don't change anymore, then stop (algorithm converged)
        if (diff == 0 && stopOnConverge) {
          testResults[t] = (double) i;
          if (canDisplay) {
            disp.setLabel("[ Converged at iteration #" + (i) + " ]");
            disp.repaint();
          } else {
            System.out.println("[ Converged at iteration #" + (i) + " ]");
          }
          break;
        }

        pause(100);
      } // loop over iterations

      if (testResults[t] == 0) {
        System.out.println("!!!!!!!!!! Test #" + t + " did not converge.");
        if (stopOnConverge) return;
      }

      // reset display
      if (canDisplay && t + 1 < nbTests) {
        for (Point p : points) p.setCluster(null);
        for (Cluster c : clusters) disp.removeObject(c.getCenter());
      }
    } // loop over tests

    // display test results and compute means
    DescriptiveStatistics stats = new DescriptiveStatistics(testResults);
    System.out.println("=========> Results:");
    for (int t = 0; t < nbTests; ++t) {
      System.out.println("--> [ " + testResults[t] + " ]");
    }
    System.out.println("=========> Means: " + stats.getMean());
    System.out.println("=========> Std dev: " + stats.getStandardDeviation());
  }