@Override public void reduce( IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { StringBuilder sb = new StringBuilder(); while (values.hasNext()) { sb.append("," + values.next()); } v.set(sb.toString().replaceFirst(",", "")); // 去掉第一个逗号 output.collect(key, v); // 键为itemID,值为其打过分的所有用户的偏好 System.out.println(key.toString() + " " + v.toString()); }
@Override protected void map(Centroid key, IntWritable value, Context context) throws IOException, InterruptedException { context.write(new Text(key.toString()), new Text(value.toString())); }
@Override public String toString() { return "<key: " + key.toString() + ", partition: " + partition.toString() + ">"; }
@Override public void reduce( IntWritable key, Iterator<ClusterWritable> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { float sumSimilarity = 0.0f; int numMovies = 0; float avgSimilarity = 0.0f; float similarity = 0.0f; int s = 0; int count; float diff = 0.0f; float minDiff = 1.0f; int candidate = 0; String data = new String(""); String shortline = new String(""); ArrayList<String> arrl = new ArrayList<String>(); ArrayList<Float> simArrl = new ArrayList<Float>(); String oneElm = new String(); int indexShort, index2; Text val = new Text(); while (values.hasNext()) { ClusterWritable cr = (ClusterWritable) values.next(); similarity = cr.similarity; simArrl.addAll(cr.similarities); for (int i = 0; i < cr.movies.size(); i++) { oneElm = cr.movies.get(i); indexShort = oneElm.indexOf( ",", 1000); // to avoid memory error caused by long arrays; it will results less // accurate if (indexShort == -1) { shortline = new String(oneElm); } else { shortline = new String(oneElm.substring(0, indexShort)); } arrl.add(shortline); output.collect(key, new Text(oneElm)); } numMovies += cr.movies.size(); sumSimilarity += similarity; } if (numMovies > 0) { avgSimilarity = sumSimilarity / (float) numMovies; } diff = 0.0f; minDiff = 1.0f; for (s = 0; s < numMovies; s++) { diff = (float) Math.abs(avgSimilarity - simArrl.get(s)); if (diff < minDiff) { minDiff = diff; candidate = s; } } data = arrl.get(candidate); index2 = data.indexOf(":"); String movieStr = data.substring(0, index2); String reviews = data.substring(index2 + 1); StringTokenizer token = new StringTokenizer(reviews, ","); count = 0; while (token.hasMoreTokens()) { token.nextToken(); count++; } System.out.println( "The key = " + key.toString() + " has members = " + numMovies + " simil = " + simArrl.get(candidate)); val = new Text(simArrl.get(candidate) + " " + movieStr + " " + count + " " + reviews); output.collect(key, val); reporter.incrCounter(Counter.VALUES, 1); }
@Override public String toString() { return first.toString() + ":" + second.toString(); }