public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: KMeansMP <input_file> <results>"); System.exit(1); } String inputFile = args[0]; String results_path = args[1]; JavaPairRDD<Integer, Iterable<String>> results; int k = 4; int iterations = 100; int runs = 1; long seed = 0; final KMeansModel model; SparkConf sparkConf = new SparkConf().setAppName("KMeans MP"); JavaSparkContext sc = new JavaSparkContext(sparkConf); JavaRDD<String> lines = sc.textFile(inputFile); JavaRDD<Vector> points = lines.map(new ParsePoint()); JavaRDD<String> titles = lines.map(new ParseTitle()); model = KMeans.train(points.rdd(), k, iterations, runs, KMeans.RANDOM(), 0); results = titles.zip(points).mapToPair(new ClusterCars(model)).groupByKey(); results.saveAsTextFile(results_path); sc.stop(); }