@Test public void shouldWriteToArbitraryRDD() throws Exception { final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName()); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, TestHelper.makeTestDataDirectory(this.getClass(), "shouldWriteToArbitraryRDD")); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); //////// Graph graph = GraphFactory.open(configuration); graph .compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program( TraversalVertexProgram.build() .traversal( graph.traversal().withComputer(g -> g.compute(SparkGraphComputer.class)), "gremlin-groovy", "g.V()") .create(graph)) .submit() .get(); }
@Test public void shouldWriteToArbitraryRDD() throws Exception { final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, HadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output"); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); //////// Graph graph = GraphFactory.open(configuration); graph .compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program( TraversalVertexProgram.build() .traversal( GraphTraversalSource.build() .engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)), "gremlin-groovy", "g.V()") .create(graph)) .submit() .get(); }
@Test public void polygonToPolygonsIntersectionTest() throws IOException { // create polygons List<Point> firstPolygonPoints = new ArrayList<Point>(); firstPolygonPoints.add(new PointImpl(10, 10, SpatialContext.GEO)); firstPolygonPoints.add(new PointImpl(8, 10, SpatialContext.GEO)); firstPolygonPoints.add(new PointImpl(8, 8, SpatialContext.GEO)); firstPolygonPoints.add(new PointImpl(10, 8, SpatialContext.GEO)); firstPolygonPoints.add(new PointImpl(10, 10, SpatialContext.GEO)); Map<String, Object> firstPolygon = buildGeoJsonPolygon(firstPolygonPoints); List<Point> secondPolygonPoints = new ArrayList<Point>(); secondPolygonPoints.add(new PointImpl(14, 10, SpatialContext.GEO)); secondPolygonPoints.add(new PointImpl(12, 10, SpatialContext.GEO)); secondPolygonPoints.add(new PointImpl(12, 8, SpatialContext.GEO)); secondPolygonPoints.add(new PointImpl(14, 8, SpatialContext.GEO)); secondPolygonPoints.add(new PointImpl(14, 10, SpatialContext.GEO)); Map<String, Object> secondPolygon = buildGeoJsonPolygon(secondPolygonPoints); // add the vertices to graph graph.addVertex(T.label, DOCUMENT_TYPE, T.id, "1", "location", firstPolygon); graph.addVertex(T.label, DOCUMENT_TYPE, T.id, "2", "location", secondPolygon); GraphTraversalSource g = graph.traversal(); String geoJsonPoint = "{ \"type\": \"Polygon\",\"coordinates\": [[[9, 10],[11, 10],[11, 8],[9, 8],[9, 10]]]}"; long intersectionCounter = g.V().has("location", Geo.intersercts(geoJsonPoint)).count().next(); assertEquals(1l, intersectionCounter); Element location = g.V().has("location", Geo.intersercts(geoJsonPoint)).next(); assertEquals("1", location.id().toString()); }
public static void createRandomGraph( final Graph graph, final int numberOfVertices, final int maxNumberOfEdgesPerVertex) { final Random random = new Random(); for (int i = 0; i < numberOfVertices; i++) { graph.addVertex(T.id, i); } graph .vertices() .forEachRemaining( vertex -> { for (int i = 0; i < random.nextInt(maxNumberOfEdgesPerVertex); i++) { final Vertex other = graph.vertices(random.nextInt(numberOfVertices)).next(); vertex.addEdge("link", other); } }); }
@Test public void shouldNotHaveDanglingPersistedComputeRDDs() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory( PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); Graph graph = GraphFactory.open(configuration); /// assertEquals( 6, graph .traversal() .withComputer(Computer.compute(SparkGraphComputer.class)) .V() .out() .count() .next() .longValue()); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); // assertEquals( 2, graph .traversal() .withComputer(Computer.compute(SparkGraphComputer.class)) .V() .out() .out() .count() .next() .longValue()); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); /////// Spark.close(); }
@Test public void shouldPersistRDDBasedOnStorageLevel() throws Exception { Spark.create("local[4]"); int counter = 0; for (final String storageLevel : Arrays.asList("MEMORY_ONLY", "DISK_ONLY", "MEMORY_ONLY_SER", "MEMORY_AND_DISK_SER")) { assertEquals(counter, Spark.getRDDs().size()); assertEquals(counter, Spark.getContext().getPersistentRDDs().size()); counter++; final String rddName = TestHelper.makeTestDataDirectory( PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, storageLevel); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); Graph graph = GraphFactory.open(configuration); graph .compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program( TraversalVertexProgram.build() .traversal( graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V().groupCount('m').by('name').out()") .create(graph)) .submit() .get(); //////// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals( StorageLevel.fromString(storageLevel), Spark.getRDD(Constants.getGraphLocation(rddName)).getStorageLevel()); assertEquals(counter, Spark.getRDDs().size()); assertEquals(counter, Spark.getContext().getPersistentRDDs().size()); } Spark.close(); }
@Test @LoadGraphWith(MODERN) @FeatureRequirement( featureClass = Graph.Features.VertexFeatures.class, feature = FEATURE_ADD_VERTICES) @FeatureRequirement(featureClass = Graph.Features.EdgeFeatures.class, feature = FEATURE_ADD_EDGES) @FeatureRequirement( featureClass = Graph.Features.VertexFeatures.class, feature = FEATURE_USER_SUPPLIED_IDS) @FeatureRequirement( featureClass = Graph.Features.EdgeFeatures.class, feature = FEATURE_USER_SUPPLIED_IDS) public void g_V_withSideEffectXsgX_outEXknowsX_subgraphXsgX_name_capXsgX() throws Exception { final Configuration config = graphProvider.newGraphConfiguration( "subgraph", this.getClass(), name.getMethodName(), MODERN); graphProvider.clear(config); Graph subgraph = graphProvider.openTestGraph(config); ///// final Traversal<Vertex, Graph> traversal = get_g_V_withSideEffectXsgX_outEXknowsX_subgraphXsgX_name_capXsgX( convertToVertexId("marko"), subgraph); printTraversalForm(traversal); subgraph = traversal.next(); assertVertexEdgeCounts(3, 2).accept(subgraph); subgraph .edges() .forEachRemaining( e -> { assertEquals("knows", e.label()); assertEquals("marko", e.outVertex().values("name").next()); assertEquals(new Integer(29), e.outVertex().<Integer>values("age").next()); assertEquals("person", e.outVertex().label()); final String name = e.inVertex().<String>values("name").next(); if (name.equals("vadas")) assertEquals(0.5d, e.value("weight"), 0.0001d); else if (name.equals("josh")) assertEquals(1.0d, e.value("weight"), 0.0001d); else fail("There's a vertex present that should not be in the subgraph"); }); graphProvider.clear(subgraph, config); }
@Override public void configure(final Map<String, Object> config, final Map<String, Graph> graphs) { final GryoMapper.Builder builder; final Object graphToUseForMapper = config.get(TOKEN_USE_MAPPER_FROM_GRAPH); if (graphToUseForMapper != null) { if (null == graphs) throw new IllegalStateException( String.format( "No graphs have been provided to the serializer and therefore %s is not a valid configuration", TOKEN_USE_MAPPER_FROM_GRAPH)); final Graph g = graphs.get(graphToUseForMapper.toString()); if (null == g) throw new IllegalStateException( String.format( "There is no graph named [%s] configured to be used in the %s setting", graphToUseForMapper, TOKEN_USE_MAPPER_FROM_GRAPH)); // a graph was found so use the mapper it constructs. this allows gryo to be auto-configured // with any // custom classes that the implementation allows for builder = g.io(GryoIo.build()).mapper(); } else { // no graph was supplied so just use the default - this will likely be the case when using a // graph // with no custom classes or a situation where the user needs complete control like when using // two // distinct implementations each with their own custom classes. builder = GryoMapper.build(); } addIoRegistries(config, builder); addCustomClasses(config, builder); this.serializeToString = Boolean.parseBoolean( config.getOrDefault(TOKEN_SERIALIZE_RESULT_TO_STRING, "false").toString()); this.gryoMapper = builder.create(); }
@Test public void shouldNotPersistRDDAcrossJobs() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory( PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty( Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false); // because the spark context is NOT persisted, neither is the RDD Graph graph = GraphFactory.open(configuration); graph .compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program( TraversalVertexProgram.build() .traversal( graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V()") .create(graph)) .submit() .get(); //////// Spark.create("local[4]"); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); Spark.close(); }
@Test public void testComplexChain() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory( PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD"); final String rddName2 = TestHelper.makeTestDataDirectory( PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD2"); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); Graph graph = GraphFactory.open(configuration); graph = graph .compute(SparkGraphComputer.class) .persist(GraphComputer.Persist.EDGES) .program(PageRankVertexProgram.build().iterations(2).create(graph)) .submit() .get() .graph(); GraphTraversalSource g = graph.traversal(); assertEquals(6l, g.V().count().next().longValue()); assertEquals(6l, g.E().count().next().longValue()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); //// configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2); //// graph = GraphFactory.open(configuration); graph = graph .compute(SparkGraphComputer.class) .persist(GraphComputer.Persist.EDGES) .mapReduce(PageRankMapReduce.build().create()) .program(PageRankVertexProgram.build().iterations(2).create(graph)) .submit() .get() .graph(); g = graph.traversal(); assertEquals(6l, g.V().count().next().longValue()); assertEquals(6l, g.E().count().next().longValue()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertTrue( Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY))); assertEquals(3, Spark.getContext().getPersistentRDDs().size()); //// graph = GraphFactory.open(configuration); graph = graph .compute(SparkGraphComputer.class) .persist(GraphComputer.Persist.VERTEX_PROPERTIES) .program(PageRankVertexProgram.build().iterations(2).create(graph)) .submit() .get() .graph(); g = graph.traversal(); assertEquals(6l, g.V().count().next().longValue()); assertEquals(0l, g.E().count().next().longValue()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertFalse( Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY))); assertEquals(2, Spark.getContext().getPersistentRDDs().size()); //// graph = GraphFactory.open(configuration); graph = graph .compute(SparkGraphComputer.class) .persist(GraphComputer.Persist.NOTHING) .program(PageRankVertexProgram.build().iterations(2).create(graph)) .submit() .get() .graph(); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2))); g = graph.traversal(); assertEquals(0l, g.V().count().next().longValue()); assertEquals(0l, g.E().count().next().longValue()); assertEquals(0l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertFalse( Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); Spark.close(); }
@Test public void testBulkLoaderVertexProgramChainWithInputOutputHelperMapping() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory( PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration readConfiguration = super.getBaseConfiguration(); readConfiguration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); readConfiguration.setProperty( Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); readConfiguration.setProperty( Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); readConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); readConfiguration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); Graph pageRankGraph = GraphFactory.open(readConfiguration); /////////////// final Configuration writeConfiguration = new BaseConfiguration(); writeConfiguration.setProperty(Graph.GRAPH, TinkerGraph.class.getCanonicalName()); writeConfiguration.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "gryo"); writeConfiguration.setProperty( TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class) + "testBulkLoaderVertexProgramChainWithInputOutputHelperMapping.kryo"); final Graph bulkLoaderGraph = pageRankGraph .compute(SparkGraphComputer.class) .persist(GraphComputer.Persist.EDGES) .program(PageRankVertexProgram.build().create(pageRankGraph)) .submit() .get() .graph(); bulkLoaderGraph .compute(SparkGraphComputer.class) .persist(GraphComputer.Persist.NOTHING) .workers(1) .program( BulkLoaderVertexProgram.build() .userSuppliedIds(true) .writeGraph(writeConfiguration) .create(bulkLoaderGraph)) .submit() .get(); //// Spark.create(readConfiguration); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); //// final Graph graph = TinkerGraph.open(); final GraphTraversalSource g = graph.traversal(); graph .io(IoCore.gryo()) .readGraph( TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class) + "testBulkLoaderVertexProgramChainWithInputOutputHelperMapping.kryo"); assertEquals(6l, g.V().count().next().longValue()); assertEquals(6l, g.E().count().next().longValue()); assertEquals("marko", g.V().has("name", "marko").values("name").next()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// Spark.close(); }