Beispiel #1
0
  public static void trainModel(String filteredDataPath, String modelpath) throws IOException {

    String line = "";
    String combline = "";
    // read and process raw data
    BufferedReader br = new BufferedReader(new FileReader(filteredDataPath));

    while ((line = br.readLine()) != null) combline = combline + " " + line;

    List<String> words = Lists.newArrayList(combline.split(" "));
    List<List<String>> localDoc = Lists.newArrayList(words, words);

    // build a context object
    JavaSparkContext sc = new JavaSparkContext("local", "Word2VecSuite");
    JavaRDD<List<String>> doc = sc.parallelize(localDoc);

    // training settings
    Word2Vec word2vec = new Word2Vec().setVectorSize(100).setMinCount(50).setSeed(42L);

    // train
    Word2VecModel model = word2vec.fit(doc);

    // save model
    SparkContext sc1 = sc.toSparkContext(sc);
    model.save(sc1, modelpath);
    System.out.println("Model has been saved in folder: " + modelpath);
  }
 /**
  * A static factory method to create a {@link GemFireJavaSparkContextFunctions} based on an
  * existing {@link JavaSparkContext} instance.
  */
 public static GemFireJavaSparkContextFunctions javaFunctions(JavaSparkContext jsc) {
   return new GemFireJavaSparkContextFunctions(JavaSparkContext.toSparkContext(jsc));
 }