Java SparkUtilities Examples

Programming Language: Java

Namespace/Package Name: javax.annotation

Class/Type: SparkUtilities

Examples at hotexamples.com: 3

Java SparkUtilities - 3 examples found. These are the top rated real world Java examples of javax.annotation.SparkUtilities extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

realizeAndReturn(2)

fromIterable(1)

getCurrentContext(1)

Example #1

Show file

File: SparkMapReduce.java Project: tanfei/distributed-tools

  // @Override
  public void performSourceMapReduce(JavaRDD<KeyValueObject<KEYIN, VALUEIN>> pInputs) {
    // if not commented out this line forces mappedKeys to be realized
    //    pInputs = SparkUtilities.realizeAndReturn(pInputs,getCtx());
    JavaSparkContext ctx2 = SparkUtilities.getCurrentContext();
    System.err.println("Starting Score Mapping");
    JavaPairRDD<K, Tuple2<K, V>> kkv = performMappingPart(pInputs);
    //      kkv = SparkUtilities.realizeAndReturn(kkv, ctx2);

    //        mappedKeys = mappedKeys.persist(StorageLevel.MEMORY_AND_DISK_2());
    //        // if not commented out this line forces mappedKeys to be realized
    //        mappedKeys = SparkUtilities.realizeAndReturn(mappedKeys, ctx2);
    //
    //        // convert to tuples
    //     //   JavaPairRDD<K, Tuple2<K, V>> kkv = mappedKeys.mapToPair(new KeyValuePairFunction<K,
    // V>());
    //
    //        kkv = kkv.persist(StorageLevel.MEMORY_AND_DISK_2());
    //        // if not commented out this line forces mappedKeys to be realized
    //       kkv = SparkUtilities.realizeAndReturn(kkv, ctx2);

    // if not commented out this line forces kvJavaPairRDD to be realized
    // kkv = SparkUtilities.realizeAndReturn(kkv );

    System.err.println("Starting Score Reduce");
    IReducerFunction reduce = getReduce();
    // for some reason the compiler thnks K or V is not Serializable
    JavaPairRDD<K, Tuple2<K, V>> kkv1 = kkv;

    // JavaPairRDD<? extends Serializable, Tuple2<? extends Serializable, ? extends Serializable>>
    // kkv1 = (JavaPairRDD<? extends Serializable, Tuple2<? extends Serializable, ? extends
    // Serializable>>)kkv;
    //noinspection unchecked
    JavaPairRDD<K, KeyAndValues<K, V>> reducedSets =
        (JavaPairRDD<K, KeyAndValues<K, V>>) KeyAndValues.combineByKey(kkv1);

    // if not commented out this line forces kvJavaPairRDD to be realized
    reducedSets = SparkUtilities.realizeAndReturn(reducedSets);

    PartitionAdaptor<K> prt = new PartitionAdaptor<K>(getPartitioner());
    reducedSets = reducedSets.partitionBy(prt);
    reducedSets = reducedSets.sortByKey();

    // if not commented out this line forces kvJavaPairRDD to be realized
    reducedSets = SparkUtilities.realizeAndReturn(reducedSets);

    ReduceFunctionAdaptor f = new ReduceFunctionAdaptor(ctx2, reduce);

    JavaRDD<KeyValueObject<KOUT, VOUT>> reducedOutput = reducedSets.flatMap(f);

    //  JavaPairRDD<K, V> kvJavaPairRDD = asTuples.partitionBy(sparkPartitioner);

    // if not commented out this line forces kvJavaPairRDD to be realized
    // kvJavaPairRDD = SparkUtilities.realizeAndReturn(kvJavaPairRDD,getCtx());

    // if not commented out this line forces kvJavaPairRDD to be realized
    //  reducedOutput = SparkUtilities.realizeAndReturn(reducedOutput, ctx2);

    output = reducedOutput;
  }

Example #2

Show file

File: SparkMapReduce.java Project: tanfei/distributed-tools

  // @Override
  public void performSingleReturnMapReduce(JavaRDD<KeyValueObject<KEYIN, VALUEIN>> pInputs) {
    // if not commented out this line forces mappedKeys to be realized
    //    pInputs = SparkUtilities.realizeAndReturn(pInputs,getCtx());
    JavaPairRDD<K, Tuple2<K, V>> kkv = performMappingPart(pInputs);

    // if not commented out this line forces kvJavaPairRDD to be realized
    kkv = SparkUtilities.realizeAndReturn(kkv);

    PartitionAdaptor<K> prt = new PartitionAdaptor<K>(getPartitioner());
    kkv = kkv.partitionBy(prt);

    IReducerFunction reduce = getReduce();
    /** we can guarantee one output per input */
    SingleOutputReduceFunctionAdaptor<K, V, KOUT, VOUT> f =
        new SingleOutputReduceFunctionAdaptor((ISingleOutputReducerFunction) reduce);
    JavaRDD<KeyValueObject<KOUT, VOUT>> reduced = kkv.map(f);

    // if not commented out this line forces kvJavaPairRDD to be realized
    reduced = SparkUtilities.realizeAndReturn(reduced);

    output = reduced;
  }

Example #3

Show file

File: SparkMapReduce.java Project: tanfei/distributed-tools

 /**
  * sources may be very implementation specific
  *
  * @param source some source of data - might be a hadoop directory or a Spark RDD - this will be
  *     cast internally
  * @param otherData
  */
 @Override
 public void mapReduceSource(@Nonnull final Object source, final Object... otherData) {
   if (source instanceof JavaRDD) {
     performSourceMapReduce((JavaRDD) source);
     return;
   }
   if (source instanceof Path) {
     performMapReduce((Path) source);
     return;
   }
   if (source instanceof java.lang.Iterable) {
     performSourceMapReduce(SparkUtilities.fromIterable((Iterable) source));
     return;
   }
   throw new IllegalArgumentException("cannot handle source of class " + source.getClass());
 }