@Test
 public void shouldWriteToArbitraryRDD() throws Exception {
   final Configuration configuration = new BaseConfiguration();
   configuration.setProperty("spark.master", "local[4]");
   configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
   configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
       TestHelper.makeTestDataDirectory(this.getClass(), "shouldWriteToArbitraryRDD"));
   configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
   ////////
   Graph graph = GraphFactory.open(configuration);
   graph
       .compute(SparkGraphComputer.class)
       .result(GraphComputer.ResultGraph.NEW)
       .persist(GraphComputer.Persist.EDGES)
       .program(
           TraversalVertexProgram.build()
               .traversal(
                   graph.traversal().withComputer(g -> g.compute(SparkGraphComputer.class)),
                   "gremlin-groovy",
                   "g.V()")
               .create(graph))
       .submit()
       .get();
 }
 @Test
 public void shouldWriteToArbitraryRDD() throws Exception {
   final Configuration configuration = new BaseConfiguration();
   configuration.setProperty("spark.master", "local[4]");
   configuration.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
   configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       HadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
   configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
   ////////
   Graph graph = GraphFactory.open(configuration);
   graph
       .compute(SparkGraphComputer.class)
       .result(GraphComputer.ResultGraph.NEW)
       .persist(GraphComputer.Persist.EDGES)
       .program(
           TraversalVertexProgram.build()
               .traversal(
                   GraphTraversalSource.build()
                       .engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)),
                   "gremlin-groovy",
                   "g.V()")
               .create(graph))
       .submit()
       .get();
 }
Exemplo n.º 3
0
  @Test
  public void polygonToPolygonsIntersectionTest() throws IOException {
    // create polygons
    List<Point> firstPolygonPoints = new ArrayList<Point>();
    firstPolygonPoints.add(new PointImpl(10, 10, SpatialContext.GEO));
    firstPolygonPoints.add(new PointImpl(8, 10, SpatialContext.GEO));
    firstPolygonPoints.add(new PointImpl(8, 8, SpatialContext.GEO));
    firstPolygonPoints.add(new PointImpl(10, 8, SpatialContext.GEO));
    firstPolygonPoints.add(new PointImpl(10, 10, SpatialContext.GEO));
    Map<String, Object> firstPolygon = buildGeoJsonPolygon(firstPolygonPoints);
    List<Point> secondPolygonPoints = new ArrayList<Point>();
    secondPolygonPoints.add(new PointImpl(14, 10, SpatialContext.GEO));
    secondPolygonPoints.add(new PointImpl(12, 10, SpatialContext.GEO));
    secondPolygonPoints.add(new PointImpl(12, 8, SpatialContext.GEO));
    secondPolygonPoints.add(new PointImpl(14, 8, SpatialContext.GEO));
    secondPolygonPoints.add(new PointImpl(14, 10, SpatialContext.GEO));
    Map<String, Object> secondPolygon = buildGeoJsonPolygon(secondPolygonPoints);

    // add the vertices to graph
    graph.addVertex(T.label, DOCUMENT_TYPE, T.id, "1", "location", firstPolygon);
    graph.addVertex(T.label, DOCUMENT_TYPE, T.id, "2", "location", secondPolygon);

    GraphTraversalSource g = graph.traversal();

    String geoJsonPoint =
        "{ \"type\": \"Polygon\",\"coordinates\": [[[9, 10],[11, 10],[11, 8],[9, 8],[9, 10]]]}";
    long intersectionCounter = g.V().has("location", Geo.intersercts(geoJsonPoint)).count().next();
    assertEquals(1l, intersectionCounter);
    Element location = g.V().has("location", Geo.intersercts(geoJsonPoint)).next();
    assertEquals("1", location.id().toString());
  }
 public static void createRandomGraph(
     final Graph graph, final int numberOfVertices, final int maxNumberOfEdgesPerVertex) {
   final Random random = new Random();
   for (int i = 0; i < numberOfVertices; i++) {
     graph.addVertex(T.id, i);
   }
   graph
       .vertices()
       .forEachRemaining(
           vertex -> {
             for (int i = 0; i < random.nextInt(maxNumberOfEdgesPerVertex); i++) {
               final Vertex other = graph.vertices(random.nextInt(numberOfVertices)).next();
               vertex.addEdge("link", other);
             }
           });
 }
 @Test
 public void shouldNotHaveDanglingPersistedComputeRDDs() throws Exception {
   Spark.create("local[4]");
   final String rddName =
       TestHelper.makeTestDataDirectory(
           PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
   final Configuration configuration = super.getBaseConfiguration();
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName());
   configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
   configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
   Graph graph = GraphFactory.open(configuration);
   ///
   assertEquals(
       6,
       graph
           .traversal()
           .withComputer(Computer.compute(SparkGraphComputer.class))
           .V()
           .out()
           .count()
           .next()
           .longValue());
   assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
   assertEquals(0, Spark.getContext().getPersistentRDDs().size());
   //
   assertEquals(
       2,
       graph
           .traversal()
           .withComputer(Computer.compute(SparkGraphComputer.class))
           .V()
           .out()
           .out()
           .count()
           .next()
           .longValue());
   assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
   assertEquals(0, Spark.getContext().getPersistentRDDs().size());
   ///////
   Spark.close();
 }
 @Test
 public void shouldPersistRDDBasedOnStorageLevel() throws Exception {
   Spark.create("local[4]");
   int counter = 0;
   for (final String storageLevel :
       Arrays.asList("MEMORY_ONLY", "DISK_ONLY", "MEMORY_ONLY_SER", "MEMORY_AND_DISK_SER")) {
     assertEquals(counter, Spark.getRDDs().size());
     assertEquals(counter, Spark.getContext().getPersistentRDDs().size());
     counter++;
     final String rddName =
         TestHelper.makeTestDataDirectory(
             PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
     final Configuration configuration = super.getBaseConfiguration();
     configuration.setProperty(
         Constants.GREMLIN_HADOOP_INPUT_LOCATION,
         SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
     configuration.setProperty(
         Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
     configuration.setProperty(
         Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
     configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, storageLevel);
     configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
     configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
     Graph graph = GraphFactory.open(configuration);
     graph
         .compute(SparkGraphComputer.class)
         .result(GraphComputer.ResultGraph.NEW)
         .persist(GraphComputer.Persist.EDGES)
         .program(
             TraversalVertexProgram.build()
                 .traversal(
                     graph.traversal().withComputer(SparkGraphComputer.class),
                     "gremlin-groovy",
                     "g.V().groupCount('m').by('name').out()")
                 .create(graph))
         .submit()
         .get();
     ////////
     assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
     assertEquals(
         StorageLevel.fromString(storageLevel),
         Spark.getRDD(Constants.getGraphLocation(rddName)).getStorageLevel());
     assertEquals(counter, Spark.getRDDs().size());
     assertEquals(counter, Spark.getContext().getPersistentRDDs().size());
   }
   Spark.close();
 }
  @Test
  @LoadGraphWith(MODERN)
  @FeatureRequirement(
      featureClass = Graph.Features.VertexFeatures.class,
      feature = FEATURE_ADD_VERTICES)
  @FeatureRequirement(featureClass = Graph.Features.EdgeFeatures.class, feature = FEATURE_ADD_EDGES)
  @FeatureRequirement(
      featureClass = Graph.Features.VertexFeatures.class,
      feature = FEATURE_USER_SUPPLIED_IDS)
  @FeatureRequirement(
      featureClass = Graph.Features.EdgeFeatures.class,
      feature = FEATURE_USER_SUPPLIED_IDS)
  public void g_V_withSideEffectXsgX_outEXknowsX_subgraphXsgX_name_capXsgX() throws Exception {
    final Configuration config =
        graphProvider.newGraphConfiguration(
            "subgraph", this.getClass(), name.getMethodName(), MODERN);
    graphProvider.clear(config);
    Graph subgraph = graphProvider.openTestGraph(config);
    /////
    final Traversal<Vertex, Graph> traversal =
        get_g_V_withSideEffectXsgX_outEXknowsX_subgraphXsgX_name_capXsgX(
            convertToVertexId("marko"), subgraph);
    printTraversalForm(traversal);
    subgraph = traversal.next();
    assertVertexEdgeCounts(3, 2).accept(subgraph);
    subgraph
        .edges()
        .forEachRemaining(
            e -> {
              assertEquals("knows", e.label());
              assertEquals("marko", e.outVertex().values("name").next());
              assertEquals(new Integer(29), e.outVertex().<Integer>values("age").next());
              assertEquals("person", e.outVertex().label());

              final String name = e.inVertex().<String>values("name").next();
              if (name.equals("vadas")) assertEquals(0.5d, e.value("weight"), 0.0001d);
              else if (name.equals("josh")) assertEquals(1.0d, e.value("weight"), 0.0001d);
              else fail("There's a vertex present that should not be in the subgraph");
            });
    graphProvider.clear(subgraph, config);
  }
  @Override
  public void configure(final Map<String, Object> config, final Map<String, Graph> graphs) {
    final GryoMapper.Builder builder;
    final Object graphToUseForMapper = config.get(TOKEN_USE_MAPPER_FROM_GRAPH);
    if (graphToUseForMapper != null) {
      if (null == graphs)
        throw new IllegalStateException(
            String.format(
                "No graphs have been provided to the serializer and therefore %s is not a valid configuration",
                TOKEN_USE_MAPPER_FROM_GRAPH));

      final Graph g = graphs.get(graphToUseForMapper.toString());
      if (null == g)
        throw new IllegalStateException(
            String.format(
                "There is no graph named [%s] configured to be used in the %s setting",
                graphToUseForMapper, TOKEN_USE_MAPPER_FROM_GRAPH));

      // a graph was found so use the mapper it constructs.  this allows gryo to be auto-configured
      // with any
      // custom classes that the implementation allows for
      builder = g.io(GryoIo.build()).mapper();
    } else {
      // no graph was supplied so just use the default - this will likely be the case when using a
      // graph
      // with no custom classes or a situation where the user needs complete control like when using
      // two
      // distinct implementations each with their own custom classes.
      builder = GryoMapper.build();
    }

    addIoRegistries(config, builder);
    addCustomClasses(config, builder);

    this.serializeToString =
        Boolean.parseBoolean(
            config.getOrDefault(TOKEN_SERIALIZE_RESULT_TO_STRING, "false").toString());

    this.gryoMapper = builder.create();
  }
 @Test
 public void shouldNotPersistRDDAcrossJobs() throws Exception {
   Spark.create("local[4]");
   final String rddName =
       TestHelper.makeTestDataDirectory(
           PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
   final Configuration configuration = super.getBaseConfiguration();
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_INPUT_LOCATION,
       SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
   configuration.setProperty(
       Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
   configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
   configuration.setProperty(
       Constants.GREMLIN_SPARK_PERSIST_CONTEXT,
       false); // because the spark context is NOT persisted, neither is the RDD
   Graph graph = GraphFactory.open(configuration);
   graph
       .compute(SparkGraphComputer.class)
       .result(GraphComputer.ResultGraph.NEW)
       .persist(GraphComputer.Persist.EDGES)
       .program(
           TraversalVertexProgram.build()
               .traversal(
                   graph.traversal().withComputer(SparkGraphComputer.class),
                   "gremlin-groovy",
                   "g.V()")
               .create(graph))
       .submit()
       .get();
   ////////
   Spark.create("local[4]");
   assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
   assertEquals(0, Spark.getContext().getPersistentRDDs().size());
   Spark.close();
 }
  @Test
  public void testComplexChain() throws Exception {
    Spark.create("local[4]");

    final String rddName =
        TestHelper.makeTestDataDirectory(
            PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD");
    final String rddName2 =
        TestHelper.makeTestDataDirectory(
            PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD2");
    final Configuration configuration = super.getBaseConfiguration();
    configuration.setProperty(
        Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(
        Constants.GREMLIN_HADOOP_INPUT_LOCATION,
        SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
    configuration.setProperty(
        Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);

    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(0, Spark.getContext().getPersistentRDDs().size());
    Graph graph = GraphFactory.open(configuration);
    graph =
        graph
            .compute(SparkGraphComputer.class)
            .persist(GraphComputer.Persist.EDGES)
            .program(PageRankVertexProgram.build().iterations(2).create(graph))
            .submit()
            .get()
            .graph();
    GraphTraversalSource g = graph.traversal();
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(6l, g.E().count().next().longValue());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    ////
    configuration.setProperty(
        Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(
        Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2);
    ////
    graph = GraphFactory.open(configuration);
    graph =
        graph
            .compute(SparkGraphComputer.class)
            .persist(GraphComputer.Persist.EDGES)
            .mapReduce(PageRankMapReduce.build().create())
            .program(PageRankVertexProgram.build().iterations(2).create(graph))
            .submit()
            .get()
            .graph();
    g = graph.traversal();
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(6l, g.E().count().next().longValue());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertTrue(
        Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY)));
    assertEquals(3, Spark.getContext().getPersistentRDDs().size());
    ////
    graph = GraphFactory.open(configuration);
    graph =
        graph
            .compute(SparkGraphComputer.class)
            .persist(GraphComputer.Persist.VERTEX_PROPERTIES)
            .program(PageRankVertexProgram.build().iterations(2).create(graph))
            .submit()
            .get()
            .graph();
    g = graph.traversal();
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(0l, g.E().count().next().longValue());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertFalse(
        Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY)));
    assertEquals(2, Spark.getContext().getPersistentRDDs().size());
    ////
    graph = GraphFactory.open(configuration);
    graph =
        graph
            .compute(SparkGraphComputer.class)
            .persist(GraphComputer.Persist.NOTHING)
            .program(PageRankVertexProgram.build().iterations(2).create(graph))
            .submit()
            .get()
            .graph();
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    g = graph.traversal();
    assertEquals(0l, g.V().count().next().longValue());
    assertEquals(0l, g.E().count().next().longValue());
    assertEquals(0l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertFalse(
        Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    Spark.close();
  }
  @Test
  public void testBulkLoaderVertexProgramChainWithInputOutputHelperMapping() throws Exception {
    Spark.create("local[4]");

    final String rddName =
        TestHelper.makeTestDataDirectory(
            PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
    final Configuration readConfiguration = super.getBaseConfiguration();
    readConfiguration.setProperty(
        Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    readConfiguration.setProperty(
        Constants.GREMLIN_HADOOP_INPUT_LOCATION,
        SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
    readConfiguration.setProperty(
        Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    readConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    readConfiguration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    Graph pageRankGraph = GraphFactory.open(readConfiguration);
    ///////////////
    final Configuration writeConfiguration = new BaseConfiguration();
    writeConfiguration.setProperty(Graph.GRAPH, TinkerGraph.class.getCanonicalName());
    writeConfiguration.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "gryo");
    writeConfiguration.setProperty(
        TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION,
        TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class)
            + "testBulkLoaderVertexProgramChainWithInputOutputHelperMapping.kryo");
    final Graph bulkLoaderGraph =
        pageRankGraph
            .compute(SparkGraphComputer.class)
            .persist(GraphComputer.Persist.EDGES)
            .program(PageRankVertexProgram.build().create(pageRankGraph))
            .submit()
            .get()
            .graph();
    bulkLoaderGraph
        .compute(SparkGraphComputer.class)
        .persist(GraphComputer.Persist.NOTHING)
        .workers(1)
        .program(
            BulkLoaderVertexProgram.build()
                .userSuppliedIds(true)
                .writeGraph(writeConfiguration)
                .create(bulkLoaderGraph))
        .submit()
        .get();
    ////
    Spark.create(readConfiguration);
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    ////
    final Graph graph = TinkerGraph.open();
    final GraphTraversalSource g = graph.traversal();
    graph
        .io(IoCore.gryo())
        .readGraph(
            TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class)
                + "testBulkLoaderVertexProgramChainWithInputOutputHelperMapping.kryo");
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(6l, g.E().count().next().longValue());
    assertEquals("marko", g.V().has("name", "marko").values("name").next());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    Spark.close();
  }