Beispiel #1
0
  /** Reads a binary mapping file */
  public static OpenIntLongHashMap readItemIDIndexMap(
      String itemIDIndexPathStr, Configuration conf) {
    OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
    try {
      Path unqualifiedItemIDIndexPath = new Path(itemIDIndexPathStr);
      FileSystem fs = FileSystem.get(unqualifiedItemIDIndexPath.toUri(), conf);
      Path itemIDIndexPath = new Path(itemIDIndexPathStr).makeQualified(fs);

      VarIntWritable index = new VarIntWritable();
      VarLongWritable id = new VarLongWritable();
      for (FileStatus status : fs.listStatus(itemIDIndexPath, PARTS_FILTER)) {
        String path = status.getPath().toString();
        SequenceFile.Reader reader =
            new SequenceFile.Reader(fs, new Path(path).makeQualified(fs), conf);
        while (reader.next(index, id)) {
          indexItemIDMap.put(index.get(), id.get());
        }
        reader.close();
      }
    } catch (IOException ioe) {
      throw new IllegalStateException(ioe);
    }
    return indexItemIDMap;
  }
  @Override
  protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx)
      throws IOException, InterruptedException {

    int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get());
    Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
    /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */
    vector.set(itemIDIndex, Double.NaN);
    // 这是过滤的trick
    // 从这里可以反推出来,AggregateAndRecommendReducer里面过滤了评分过的 user,item这种pair

    List<Long> userIDs = Lists.newArrayList();
    List<Float> prefValues = Lists.newArrayList();
    for (VarLongWritable userID : values) {
      userIDs.add(userID.get());
      prefValues.add(1.0f);
    }

    itemIDIndexWritable.set(itemIDIndex);
    vectorAndPrefs.set(vector, userIDs, prefValues);
    ctx.write(itemIDIndexWritable, vectorAndPrefs);
  }