/** Reads a binary mapping file */ public static OpenIntLongHashMap readItemIDIndexMap( String itemIDIndexPathStr, Configuration conf) { OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap(); try { Path unqualifiedItemIDIndexPath = new Path(itemIDIndexPathStr); FileSystem fs = FileSystem.get(unqualifiedItemIDIndexPath.toUri(), conf); Path itemIDIndexPath = new Path(itemIDIndexPathStr).makeQualified(fs); VarIntWritable index = new VarIntWritable(); VarLongWritable id = new VarLongWritable(); for (FileStatus status : fs.listStatus(itemIDIndexPath, PARTS_FILTER)) { String path = status.getPath().toString(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path).makeQualified(fs), conf); while (reader.next(index, id)) { indexItemIDMap.put(index.get(), id.get()); } reader.close(); } } catch (IOException ioe) { throw new IllegalStateException(ioe); } return indexItemIDMap; }
@Override protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx) throws IOException, InterruptedException { int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get()); Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */ vector.set(itemIDIndex, Double.NaN); // 这是过滤的trick // 从这里可以反推出来,AggregateAndRecommendReducer里面过滤了评分过的 user,item这种pair List<Long> userIDs = Lists.newArrayList(); List<Float> prefValues = Lists.newArrayList(); for (VarLongWritable userID : values) { userIDs.add(userID.get()); prefValues.add(1.0f); } itemIDIndexWritable.set(itemIDIndex); vectorAndPrefs.set(vector, userIDs, prefValues); ctx.write(itemIDIndexWritable, vectorAndPrefs); }