コード例 #1
0
  // @Override
  public void performSourceMapReduce(JavaRDD<KeyValueObject<KEYIN, VALUEIN>> pInputs) {
    // if not commented out this line forces mappedKeys to be realized
    //    pInputs = SparkUtilities.realizeAndReturn(pInputs,getCtx());
    JavaSparkContext ctx2 = SparkUtilities.getCurrentContext();
    System.err.println("Starting Score Mapping");
    JavaPairRDD<K, Tuple2<K, V>> kkv = performMappingPart(pInputs);
    //      kkv = SparkUtilities.realizeAndReturn(kkv, ctx2);

    //        mappedKeys = mappedKeys.persist(StorageLevel.MEMORY_AND_DISK_2());
    //        // if not commented out this line forces mappedKeys to be realized
    //        mappedKeys = SparkUtilities.realizeAndReturn(mappedKeys, ctx2);
    //
    //        // convert to tuples
    //     //   JavaPairRDD<K, Tuple2<K, V>> kkv = mappedKeys.mapToPair(new KeyValuePairFunction<K,
    // V>());
    //
    //        kkv = kkv.persist(StorageLevel.MEMORY_AND_DISK_2());
    //        // if not commented out this line forces mappedKeys to be realized
    //       kkv = SparkUtilities.realizeAndReturn(kkv, ctx2);

    // if not commented out this line forces kvJavaPairRDD to be realized
    // kkv = SparkUtilities.realizeAndReturn(kkv );

    System.err.println("Starting Score Reduce");
    IReducerFunction reduce = getReduce();
    // for some reason the compiler thnks K or V is not Serializable
    JavaPairRDD<K, Tuple2<K, V>> kkv1 = kkv;

    // JavaPairRDD<? extends Serializable, Tuple2<? extends Serializable, ? extends Serializable>>
    // kkv1 = (JavaPairRDD<? extends Serializable, Tuple2<? extends Serializable, ? extends
    // Serializable>>)kkv;
    //noinspection unchecked
    JavaPairRDD<K, KeyAndValues<K, V>> reducedSets =
        (JavaPairRDD<K, KeyAndValues<K, V>>) KeyAndValues.combineByKey(kkv1);

    // if not commented out this line forces kvJavaPairRDD to be realized
    reducedSets = SparkUtilities.realizeAndReturn(reducedSets);

    PartitionAdaptor<K> prt = new PartitionAdaptor<K>(getPartitioner());
    reducedSets = reducedSets.partitionBy(prt);
    reducedSets = reducedSets.sortByKey();

    // if not commented out this line forces kvJavaPairRDD to be realized
    reducedSets = SparkUtilities.realizeAndReturn(reducedSets);

    ReduceFunctionAdaptor f = new ReduceFunctionAdaptor(ctx2, reduce);

    JavaRDD<KeyValueObject<KOUT, VOUT>> reducedOutput = reducedSets.flatMap(f);

    //  JavaPairRDD<K, V> kvJavaPairRDD = asTuples.partitionBy(sparkPartitioner);

    // if not commented out this line forces kvJavaPairRDD to be realized
    // kvJavaPairRDD = SparkUtilities.realizeAndReturn(kvJavaPairRDD,getCtx());

    // if not commented out this line forces kvJavaPairRDD to be realized
    //  reducedOutput = SparkUtilities.realizeAndReturn(reducedOutput, ctx2);

    output = reducedOutput;
  }
コード例 #2
0
  // @Override
  public void performSingleReturnMapReduce(JavaRDD<KeyValueObject<KEYIN, VALUEIN>> pInputs) {
    // if not commented out this line forces mappedKeys to be realized
    //    pInputs = SparkUtilities.realizeAndReturn(pInputs,getCtx());
    JavaPairRDD<K, Tuple2<K, V>> kkv = performMappingPart(pInputs);

    // if not commented out this line forces kvJavaPairRDD to be realized
    kkv = SparkUtilities.realizeAndReturn(kkv);

    PartitionAdaptor<K> prt = new PartitionAdaptor<K>(getPartitioner());
    kkv = kkv.partitionBy(prt);

    IReducerFunction reduce = getReduce();
    /** we can guarantee one output per input */
    SingleOutputReduceFunctionAdaptor<K, V, KOUT, VOUT> f =
        new SingleOutputReduceFunctionAdaptor((ISingleOutputReducerFunction) reduce);
    JavaRDD<KeyValueObject<KOUT, VOUT>> reduced = kkv.map(f);

    // if not commented out this line forces kvJavaPairRDD to be realized
    reduced = SparkUtilities.realizeAndReturn(reduced);

    output = reduced;
  }