Beispiel #1
0
  /**
   * Combines {@link Rating}s with the same user/item into one, with score as the sum of all of the
   * scores.
   */
  private JavaRDD<Rating> aggregateScores(JavaRDD<Rating> original) {
    JavaPairRDD<Tuple2<Integer, Integer>, Double> tuples =
        original.mapToPair(new RatingToTupleDouble());

    JavaPairRDD<Tuple2<Integer, Integer>, Double> aggregated;
    if (implicit) {
      // For implicit, values are scores to be summed
      aggregated = tuples.reduceByKey(Functions.SUM_DOUBLE);
    } else {
      // For non-implicit, last wins.
      aggregated = tuples.foldByKey(Double.NaN, Functions.<Double>last());
    }

    return aggregated.map(new TupleToRatingFn());
  }
Beispiel #2
0
 private static void addIDsExtension(
     PMML pmml, String key, RDD<Tuple2<Object, double[]>> features) {
   List<String> ids = fromRDD(features).keys().map(Functions.toStringValue()).collect();
   PMMLUtils.addExtensionContent(pmml, key, ids);
 }