コード例 #1
0
 @Test
 public void shouldWriteToArbitraryRDD() throws Exception {
   final Configuration configuration = new BaseConfiguration();
   configuration.setProperty("spark.master", "local[4]");
   configuration.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
   configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       HadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
   configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
   ////////
   Graph graph = GraphFactory.open(configuration);
   graph
       .compute(SparkGraphComputer.class)
       .result(GraphComputer.ResultGraph.NEW)
       .persist(GraphComputer.Persist.EDGES)
       .program(
           TraversalVertexProgram.build()
               .traversal(
                   GraphTraversalSource.build()
                       .engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)),
                   "gremlin-groovy",
                   "g.V()")
               .create(graph))
       .submit()
       .get();
 }
コード例 #2
0
 @Test
 public void shouldWriteToArbitraryRDD() throws Exception {
   final Configuration configuration = new BaseConfiguration();
   configuration.setProperty("spark.master", "local[4]");
   configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
   configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
       TestHelper.makeTestDataDirectory(this.getClass(), "shouldWriteToArbitraryRDD"));
   configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
   ////////
   Graph graph = GraphFactory.open(configuration);
   graph
       .compute(SparkGraphComputer.class)
       .result(GraphComputer.ResultGraph.NEW)
       .persist(GraphComputer.Persist.EDGES)
       .program(
           TraversalVertexProgram.build()
               .traversal(
                   graph.traversal().withComputer(g -> g.compute(SparkGraphComputer.class)),
                   "gremlin-groovy",
                   "g.V()")
               .create(graph))
       .submit()
       .get();
 }
 @Test
 public void shouldPersistRDDBasedOnStorageLevel() throws Exception {
   Spark.create("local[4]");
   int counter = 0;
   for (final String storageLevel :
       Arrays.asList("MEMORY_ONLY", "DISK_ONLY", "MEMORY_ONLY_SER", "MEMORY_AND_DISK_SER")) {
     assertEquals(counter, Spark.getRDDs().size());
     assertEquals(counter, Spark.getContext().getPersistentRDDs().size());
     counter++;
     final String rddName =
         TestHelper.makeTestDataDirectory(
             PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
     final Configuration configuration = super.getBaseConfiguration();
     configuration.setProperty(
         Constants.GREMLIN_HADOOP_INPUT_LOCATION,
         SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
     configuration.setProperty(
         Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
     configuration.setProperty(
         Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
     configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, storageLevel);
     configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
     configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
     Graph graph = GraphFactory.open(configuration);
     graph
         .compute(SparkGraphComputer.class)
         .result(GraphComputer.ResultGraph.NEW)
         .persist(GraphComputer.Persist.EDGES)
         .program(
             TraversalVertexProgram.build()
                 .traversal(
                     graph.traversal().withComputer(SparkGraphComputer.class),
                     "gremlin-groovy",
                     "g.V().groupCount('m').by('name').out()")
                 .create(graph))
         .submit()
         .get();
     ////////
     assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
     assertEquals(
         StorageLevel.fromString(storageLevel),
         Spark.getRDD(Constants.getGraphLocation(rddName)).getStorageLevel());
     assertEquals(counter, Spark.getRDDs().size());
     assertEquals(counter, Spark.getContext().getPersistentRDDs().size());
   }
   Spark.close();
 }
 @Test
 public void shouldNotPersistRDDAcrossJobs() throws Exception {
   Spark.create("local[4]");
   final String rddName =
       TestHelper.makeTestDataDirectory(
           PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
   final Configuration configuration = super.getBaseConfiguration();
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
   configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
   configuration.setProperty(
       Constants.GREMLIN_SPARK_PERSIST_CONTEXT,
       false); // because the spark context is NOT persisted, neither is the RDD
   Graph graph = GraphFactory.open(configuration);
   graph
       .compute(SparkGraphComputer.class)
       .result(GraphComputer.ResultGraph.NEW)
       .persist(GraphComputer.Persist.EDGES)
       .program(
           TraversalVertexProgram.build()
               .traversal(
                   graph.traversal().withComputer(SparkGraphComputer.class),
                   "gremlin-groovy",
                   "g.V()")
               .create(graph))
       .submit()
       .get();
   ////////
   Spark.create("local[4]");
   assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
   assertEquals(0, Spark.getContext().getPersistentRDDs().size());
   Spark.close();
 }