public static void trainModel(String filteredDataPath, String modelpath) throws IOException { String line = ""; String combline = ""; // read and process raw data BufferedReader br = new BufferedReader(new FileReader(filteredDataPath)); while ((line = br.readLine()) != null) combline = combline + " " + line; List<String> words = Lists.newArrayList(combline.split(" ")); List<List<String>> localDoc = Lists.newArrayList(words, words); // build a context object JavaSparkContext sc = new JavaSparkContext("local", "Word2VecSuite"); JavaRDD<List<String>> doc = sc.parallelize(localDoc); // training settings Word2Vec word2vec = new Word2Vec().setVectorSize(100).setMinCount(50).setSeed(42L); // train Word2VecModel model = word2vec.fit(doc); // save model SparkContext sc1 = sc.toSparkContext(sc); model.save(sc1, modelpath); System.out.println("Model has been saved in folder: " + modelpath); }
/** * A static factory method to create a {@link GemFireJavaSparkContextFunctions} based on an * existing {@link JavaSparkContext} instance. */ public static GemFireJavaSparkContextFunctions javaFunctions(JavaSparkContext jsc) { return new GemFireJavaSparkContextFunctions(JavaSparkContext.toSparkContext(jsc)); }