Java JavaPairRDD.count примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.apache.spark.api.java

Класс/Тип: JavaPairRDD

Метод/Функция: count

Примеров на hotexamples.com: 5

Java JavaPairRDD.count - 5 примеров найдено. Это лучшие примеры Java кода для org.apache.spark.api.java.JavaPairRDD.count, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

collect(16)

reduceByKey(9)

mapToPair(8)

filter(5)

count(5)

mapValues(5)

join(4)

zipWithIndex(3)

flatMapToPair(3)

sortByKey(3)

saveAsTextFile(3)

unpersist(2)

groupByKey(2)

map(2)

collectAsMap(2)

flatMap(2)

values(1)

union(1)

toRDD(1)

saveAsNewAPIHadoopFile(1)

saveAsHadoopFile(1)

flatMapValues(1)

saveAsHadoopDataset(1)

rdd(1)

foreach(1)

partitionBy(1)

mapPartitionsToPair(1)

mapPartitions(1)

leftOuterJoin(1)

fromRDD(1)

fromJavaRDD(1)

partitions(1)

Пример #1

Показать файл

Файл: JavaTC.java Проект: ScorpiusAlpha/graphx

  public static void main(String[] args) {
    if (args.length == 0) {
      System.err.println("Usage: JavaTC <host> [<slices>]");
      System.exit(1);
    }

    JavaSparkContext sc =
        new JavaSparkContext(
            args[0],
            "JavaTC",
            System.getenv("SPARK_HOME"),
            JavaSparkContext.jarOfClass(JavaTC.class));
    Integer slices = (args.length > 1) ? Integer.parseInt(args[1]) : 2;
    JavaPairRDD<Integer, Integer> tc = sc.parallelizePairs(generateGraph(), slices).cache();

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).

    // Because join() joins on keys, the edges are stored in reversed order.
    JavaPairRDD<Integer, Integer> edges =
        tc.mapToPair(
            new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() {
              @Override
              public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> e) {
                return new Tuple2<Integer, Integer>(e._2(), e._1());
              }
            });

    long oldCount = 0;
    long nextCount = tc.count();
    do {
      oldCount = nextCount;
      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
      // then project the result to obtain the new (x, z) paths.
      tc = tc.union(tc.join(edges).mapToPair(ProjectFn.INSTANCE)).distinct().cache();
      nextCount = tc.count();
    } while (nextCount != oldCount);

    System.out.println("TC has " + tc.count() + " edges.");
    System.exit(0);
  }

Пример #2

Показать файл

Файл: LifeCycleAnalyserSpark.java Проект: antoinebastien/bigdata-event-stream-detection

 /**
  * Class used to analyze themes life cycle.
  *
  * @param hmmInput The hmmInput from which is going to be used the background model, the lexicon
  *     and the wordStream. Themes must be added before any analytics can be done.
  */
 public LifeCycleAnalyserSpark(HmmInputFromParser hmmInput) {
   this.wordStream = hmmInput.wordStream;
   this.lexicon = hmmInput.lexicon;
   this.lexiconAsMap = lexicon.collectAsMap();
   getInvertedLexicon();
   numberOfThemes = 0L;
   numberOfWords = lexicon.count();
   // themes = new ArrayList<double[]>();
   setBackgroundModelAsThemebyId(hmmInput.backgroundModelById);
 }

Пример #3

Показать файл

Файл: MainRunner.java Проект: donpir/JMATRW

  public static void main(String[] args) {
    SparkConf sparkconf =
        new SparkConf()
            .setAppName("Simple Application")
            .setMaster("spark://1.245.77.10:7077")
            .set(
                "spark.driver.extraClassPath",
                "E:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/*")
            .set(
                "spark.executor.extraClassPath",
                "E:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/*")
            .set("fs.default.name", "file:///");
    JavaSparkContext sc = new JavaSparkContext(sparkconf);
    Configuration hadoopConfig = sc.hadoopConfiguration();
    hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
    // sc.addJar("e:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/jmatrw-0.2.jar");
    // sc.addJar("e:/installprogram/spark-1.5.2-bin-hadoop2.4/libthirdparty/jmatrw4spark-0.2.jar");

    /*JavaRDD<Double> matrdd2 = sc.parallelize(Arrays.asList(1.0, 3.0, 2.0));
    System.out.println("Start counting parallelize...");
    long values = matrdd2.count();
    System.out.println("Value count of parallelize is " + values);*/

    JavaPairRDD<Long, Double> matrdd =
        sc.newAPIHadoopFile(
            "e:/tmp/vecRow03_x256.mat",
            JMATFileInputFormat.class,
            Long.class,
            Double.class,
            hadoopConfig);
    System.out.println("Start job...");
    long values = matrdd.count();
    System.out.println("Value count of hadoop is " + values);

    sc.stop();
    sc.close();
  }

Пример #4

Показать файл

Файл: SparkModelUtils.java Проект: phamthaithinh/carbon-ml

  /**
   * A utility method to generate class classification model summary
   *
   * @param predictionsAndLabels Predictions and actual labels
   * @return Class classification model summary
   */
  public static ClassClassificationAndRegressionModelSummary getClassClassificationModelSummary(
      JavaSparkContext sparkContext,
      JavaRDD<LabeledPoint> testingData,
      JavaPairRDD<Double, Double> predictionsAndLabels) {
    ClassClassificationAndRegressionModelSummary classClassificationModelSummary =
        new ClassClassificationAndRegressionModelSummary();
    // store predictions and actuals
    List<PredictedVsActual> predictedVsActuals = new ArrayList<PredictedVsActual>();
    for (Tuple2<Double, Double> scoreAndLabel : predictionsAndLabels.collect()) {
      PredictedVsActual predictedVsActual = new PredictedVsActual();
      predictedVsActual.setPredicted(scoreAndLabel._1());
      predictedVsActual.setActual(scoreAndLabel._2());
      predictedVsActuals.add(predictedVsActual);
    }
    // create a list of feature values
    List<double[]> features = new ArrayList<double[]>();
    for (LabeledPoint labeledPoint : testingData.collect()) {
      if (labeledPoint != null && labeledPoint.features() != null) {
        double[] rowFeatures = labeledPoint.features().toArray();
        features.add(rowFeatures);
      }
    }
    // create a list of feature values with predicted vs. actuals
    List<TestResultDataPoint> testResultDataPoints = new ArrayList<TestResultDataPoint>();
    for (int i = 0; i < features.size(); i++) {
      TestResultDataPoint testResultDataPoint = new TestResultDataPoint();
      testResultDataPoint.setPredictedVsActual(predictedVsActuals.get(i));
      testResultDataPoint.setFeatureValues(features.get(i));
      testResultDataPoints.add(testResultDataPoint);
    }
    // covert List to JavaRDD
    JavaRDD<TestResultDataPoint> testResultDataPointsJavaRDD =
        sparkContext.parallelize(testResultDataPoints);
    // collect RDD as a sampled list
    List<TestResultDataPoint> testResultDataPointsSample;
    if (testResultDataPointsJavaRDD.count()
        > MLCoreServiceValueHolder.getInstance().getSummaryStatSettings().getSampleSize()) {
      testResultDataPointsSample =
          testResultDataPointsJavaRDD.takeSample(
              true,
              MLCoreServiceValueHolder.getInstance().getSummaryStatSettings().getSampleSize());
    } else {
      testResultDataPointsSample = testResultDataPointsJavaRDD.collect();
    }
    classClassificationModelSummary.setTestResultDataPointsSample(testResultDataPointsSample);
    classClassificationModelSummary.setPredictedVsActuals(predictedVsActuals);
    // calculate test error
    double error =
        1.0
            * predictionsAndLabels
                .filter(
                    new Function<Tuple2<Double, Double>, Boolean>() {
                      private static final long serialVersionUID = -3063364114286182333L;

                      @Override
                      public Boolean call(Tuple2<Double, Double> pl) {
                        return !pl._1().equals(pl._2());
                      }
                    })
                .count()
            / predictionsAndLabels.count();
    classClassificationModelSummary.setError(error);
    return classClassificationModelSummary;
  }

Пример #5

Показать файл

Файл: Test.java Проект: Stronhold/NewsClasifier

  public static void main(String[] args) {

    // Path de resultados
    String pathResults = "results";

    String pathToCategories = "values.txt";
    String pathToWords = "words.txt";
    File file = new File(pathToWords);

    HashMap<Double, String> categoriesDict = new HashMap<>();
    HashMap<String, String> resultado = new HashMap<>();

    FileInputStream fis = null;
    try {
      fis = new FileInputStream(pathToCategories);
      // Construct BufferedReader from InputStreamReader
      BufferedReader br = new BufferedReader(new InputStreamReader(fis));

      String line = null;
      while ((line = br.readLine()) != null) {
        String[] words = line.split(" ");
        categoriesDict.put(Double.valueOf(words[0]), words[1]);
      }
      br.close();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }

    // Path donde estaran las categorias
    String pathCategories = "src/main/resources/categoriestest/";

    // Configuracion basica de la aplicacion
    SparkConf sparkConf = new SparkConf().setAppName("NaiveBayesTest").setMaster("local[*]");

    // Creacion del contexto
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);

    NaiveBayesModel model = NaiveBayesModel.load(jsc.sc(), pathResults);

    HashMap<String, String> dictionary = loadDictionary();

    JavaRDD<String> fileWords = null;

    if (file.exists()) {
      JavaRDD<String> input = jsc.textFile(pathToWords);
      fileWords =
          input.flatMap(
              new FlatMapFunction<String, String>() {
                @Override
                public Iterable<String> call(String s) throws Exception {
                  return Arrays.asList(s.split(" "));
                }
              });
    } else {
      System.out.println("Error, there is no words");
      System.exit(-1);
    }
    ArrayList<String> aFileWords = (ArrayList<String>) fileWords.collect();

    // Cogemos el fichero en el que se encuentran las categorias
    File dir = new File(pathCategories);
    for (File f : dir.listFiles()) {
      JavaRDD<String> input = jsc.textFile(f.getPath());
      JavaRDD<String> words =
          input.flatMap(
              new FlatMapFunction<String, String>() {
                @Override
                public Iterable<String> call(String s) throws Exception {
                  return Arrays.asList(s.split(" "));
                }
              });
      JavaPairRDD<String, Double> wordCount = Reducer.parseWords(words, dictionary);
      List<Tuple2<String, Double>> total = wordCount.collect();
      List<Tuple2<String, Double>> elementsRemoved = new ArrayList<>();
      for (Tuple2<String, Double> t : total) {
        if (!t._1.equals("")) {
          elementsRemoved.add(new Tuple2<>(t._1, t._2 / wordCount.count()));
        }
      }
      ArrayList<Tuple2<String, Double>> freqFinal = new ArrayList<>();
      for (String s : aFileWords) {
        boolean found = false;
        for (Tuple2<String, Double> t : elementsRemoved) {
          if (t._1.equals(s)) {
            found = true;
            freqFinal.add(t);
            break;
          }
        }
        if (!found) {
          freqFinal.add(new Tuple2<String, Double>(s, 0.0));
        }
      }
      double[] v = new double[freqFinal.size()];
      for (int i = 0; i < freqFinal.size(); i++) {
        Tuple2<String, Double> t = freqFinal.get(i);
        v[i] = t._2;
      }
      org.apache.spark.mllib.linalg.Vector vector = Vectors.dense(v);
      /**/
      double d = model.predict(vector);
      System.out.println(categoriesDict.get(d));
      resultado.put(f.getName(), categoriesDict.get(d));
    }
    jsc.stop();
    try {
      Thread.sleep(2000);
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
    for (String key : resultado.keySet()) {
      System.out.println(key + " - " + resultado.get(key));
    }
  }