Example #1
0
  public static void main(String[] args) {

    JavaSparkContext sc = new JavaSparkContext();
    Configuration conf = sc.hadoopConfiguration();
    conf.set("fs.swift.impl", "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem");
    conf.set("fs.swift.service.test.auth.url", "http://163.17.136.246:5000/v2.0/tokens");
    conf.set("fs.swift.service.test.auth.endpoint.prefix", "endpoints");
    conf.set("fs.swift.service.test.http.port", "8080");
    conf.set("fs.swift.service.test.region", "RegionOne");
    conf.set("fs.swift.service.test.public", "true");
    conf.set("fs.swift.service.test.tenant", "big-data");
    conf.set("fs.swift.service.test.username", "k753357");
    conf.set("fs.swift.service.test.password", "k753357");
    JavaRDD<String> rawRDD = sc.textFile(args[0]);
    rawRDD.saveAsTextFile("swift://testfile.test/file/");
  }
Example #2
0
  public static void main(String[] args) {
    SparkConf sparkconf =
        new SparkConf()
            .setAppName("Simple Application")
            .setMaster("spark://1.245.77.10:7077")
            .set(
                "spark.driver.extraClassPath",
                "E:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/*")
            .set(
                "spark.executor.extraClassPath",
                "E:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/*")
            .set("fs.default.name", "file:///");
    JavaSparkContext sc = new JavaSparkContext(sparkconf);
    Configuration hadoopConfig = sc.hadoopConfiguration();
    hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
    // sc.addJar("e:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/jmatrw-0.2.jar");
    // sc.addJar("e:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/jmatrw4spark-0.2.jar");

    /*JavaRDD<Double> matrdd2 = sc.parallelize(Arrays.asList(1.0, 3.0, 2.0));
    System.out.println("Start counting parallelize...");
    long values = matrdd2.count();
    System.out.println("Value count of parallelize is " + values);*/

    JavaPairRDD<Long, Double> matrdd =
        sc.newAPIHadoopFile(
            "e:/tmp/vecRow03_x256.mat",
            JMATFileInputFormat.class,
            Long.class,
            Double.class,
            hadoopConfig);
    System.out.println("Start job...");
    long values = matrdd.count();
    System.out.println("Value count of hadoop is " + values);

    sc.stop();
    sc.close();
  }