예제 #1
0
  /**
   * tests {@link UserVectorSplitterMapper} in the special case that some userIDs shall be excluded
   */
  @Test
  public void testUserVectorSplitterMapperUserExclusion() throws Exception {
    Mapper<VarLongWritable, VectorWritable, VarIntWritable, VectorOrPrefWritable>.Context context =
        EasyMock.createMock(Mapper.Context.class);

    context.write(
        EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f));
    context.write(
        EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));

    EasyMock.replay(context);

    FastIDSet usersToRecommendFor = new FastIDSet();
    usersToRecommendFor.add(123L);

    UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
    setField(mapper, "maxPrefsPerUserConsidered", 10);
    setField(mapper, "usersToRecommendFor", usersToRecommendFor);

    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    vector.set(34, 0.5);
    vector.set(56, 0.7);

    mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
    mapper.map(new VarLongWritable(456L), new VectorWritable(vector), context);

    EasyMock.verify(context);
  }
    @Override
    protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx)
        throws IOException, InterruptedException {

      Vector rowVector = similarity.normalize(vectorWritable.get());

      int numNonZeroEntries = 0;
      double maxValue = Double.MIN_VALUE;

      Iterator<Vector.Element> nonZeroElements = rowVector.iterateNonZero();
      while (nonZeroElements.hasNext()) {
        Vector.Element element = nonZeroElements.next();
        RandomAccessSparseVector partialColumnVector =
            new RandomAccessSparseVector(Integer.MAX_VALUE);
        partialColumnVector.setQuick(row.get(), element.get());
        ctx.write(new IntWritable(element.index()), new VectorWritable(partialColumnVector));

        numNonZeroEntries++;
        if (maxValue < element.get()) {
          maxValue = element.get();
        }
      }

      if (threshold != NO_THRESHOLD) {
        nonZeroEntries.setQuick(row.get(), numNonZeroEntries);
        maxValues.setQuick(row.get(), maxValue);
      }
      norms.setQuick(row.get(), similarity.norm(rowVector));

      ctx.getCounter(Counters.ROWS).increment(1);
    }
예제 #3
0
  /** tests {@link AggregateAndRecommendReducer} */
  @Test
  public void testAggregateAndRecommendReducer() throws Exception {
    Reducer<
                VarLongWritable,
                PrefAndSimilarityColumnWritable,
                VarLongWritable,
                RecommendedItemsWritable>
            .Context
        context = EasyMock.createMock(Reducer.Context.class);

    context.write(
        EasyMock.eq(new VarLongWritable(123L)),
        recommendationsMatch(
            new GenericRecommendedItem(1L, 2.8f), new GenericRecommendedItem(2L, 2.0f)));

    EasyMock.replay(context);

    RandomAccessSparseVector similarityColumnOne =
        new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    similarityColumnOne.set(1, 0.1);
    similarityColumnOne.set(2, 0.5);

    RandomAccessSparseVector similarityColumnTwo =
        new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    similarityColumnTwo.set(1, 0.9);
    similarityColumnTwo.set(2, 0.5);

    List<PrefAndSimilarityColumnWritable> values =
        Arrays.asList(
            new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
            new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));

    OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
    indexItemIDMap.put(1, 1L);
    indexItemIDMap.put(2, 2L);

    AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();

    setField(reducer, "indexItemIDMap", indexItemIDMap);
    setField(reducer, "recommendationsPerUser", 3);

    reducer.reduce(new VarLongWritable(123L), values, context);

    EasyMock.verify(context);
  }
예제 #4
0
  /** tests {@link SimilarityMatrixRowWrapperMapper} */
  @Test
  public void testSimilarityMatrixRowWrapperMapper() throws Exception {
    Mapper<IntWritable, VectorWritable, VarIntWritable, VectorOrPrefWritable>.Context context =
        EasyMock.createMock(Mapper.Context.class);

    context.write(
        EasyMock.eq(new VarIntWritable(12)),
        vectorOfVectorOrPrefWritableMatches(MathHelper.elem(34, 0.5), MathHelper.elem(56, 0.7)));

    EasyMock.replay(context);

    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    vector.set(12, 1.0);
    vector.set(34, 0.5);
    vector.set(56, 0.7);

    new SimilarityMatrixRowWrapperMapper()
        .map(new IntWritable(12), new VectorWritable(vector), context);

    EasyMock.verify(context);
  }
예제 #5
0
  @Test
  public void testMatrixDiagonalizeMapper() throws Exception {
    MatrixDiagonalizeMapper mapper = new MatrixDiagonalizeMapper();
    Configuration conf = getConfiguration();
    conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);

    // set up the dummy writers
    DummyRecordWriter<NullWritable, IntDoublePairWritable> writer = new DummyRecordWriter<>();
    Mapper<IntWritable, VectorWritable, NullWritable, IntDoublePairWritable>.Context context =
        DummyRecordWriter.build(mapper, conf, writer);

    // perform the mapping
    for (int i = 0; i < RAW_DIMENSIONS; i++) {
      RandomAccessSparseVector toAdd = new RandomAccessSparseVector(RAW_DIMENSIONS);
      toAdd.assign(RAW[i]);
      mapper.map(new IntWritable(i), new VectorWritable(toAdd), context);
    }

    // check the number of the results
    assertEquals(
        "Number of map results", RAW_DIMENSIONS, writer.getValue(NullWritable.get()).size());
  }
예제 #6
0
  @Test
  public void testMatrixDiagonalizeReducer() throws Exception {
    MatrixDiagonalizeMapper mapper = new MatrixDiagonalizeMapper();
    Configuration conf = getConfiguration();
    conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);

    // set up the dummy writers
    DummyRecordWriter<NullWritable, IntDoublePairWritable> mapWriter = new DummyRecordWriter<>();
    Mapper<IntWritable, VectorWritable, NullWritable, IntDoublePairWritable>.Context mapContext =
        DummyRecordWriter.build(mapper, conf, mapWriter);

    // perform the mapping
    for (int i = 0; i < RAW_DIMENSIONS; i++) {
      RandomAccessSparseVector toAdd = new RandomAccessSparseVector(RAW_DIMENSIONS);
      toAdd.assign(RAW[i]);
      mapper.map(new IntWritable(i), new VectorWritable(toAdd), mapContext);
    }

    // now perform the reduction
    MatrixDiagonalizeReducer reducer = new MatrixDiagonalizeReducer();
    DummyRecordWriter<NullWritable, VectorWritable> redWriter = new DummyRecordWriter<>();
    Reducer<NullWritable, IntDoublePairWritable, NullWritable, VectorWritable>.Context redContext =
        DummyRecordWriter.build(
            reducer, conf, redWriter, NullWritable.class, IntDoublePairWritable.class);

    // only need one reduction
    reducer.reduce(NullWritable.get(), mapWriter.getValue(NullWritable.get()), redContext);

    // first, make sure there's only one result
    List<VectorWritable> list = redWriter.getValue(NullWritable.get());
    assertEquals("Only a single resulting vector", 1, list.size());
    Vector v = list.get(0).get();
    for (int i = 0; i < v.size(); i++) {
      assertEquals("Element sum is correct", rowSum(RAW[i]), v.get(i), 0.01);
    }
  }