@Test(dependsOnMethods = "testOnChange") public void testOnFeedEntityChange() throws Exception { Feed oldFeed = inputFeeds.get(0); Feed newFeed = EntityBuilderTestUtil.buildFeed( oldFeed.getName(), clusterEntity, "classified-as=Secured,source=data-warehouse", "reporting"); addStorage( newFeed, Storage.TYPE.FILESYSTEM, "jail://global:00/falcon/impression-feed/20140101"); try { configStore.initiateUpdate(newFeed); // add cluster org.apache.falcon.entity.v0.feed.Cluster feedCluster = new org.apache.falcon.entity.v0.feed.Cluster(); feedCluster.setName(anotherCluster.getName()); newFeed.getClusters().getClusters().add(feedCluster); configStore.update(EntityType.FEED, newFeed); } finally { configStore.cleanupUpdateInit(); } verifyUpdatedEdges(newFeed); Assert.assertEquals(getVerticesCount(service.getGraph()), 22); // +2 = 2 new tags Assert.assertEquals(getEdgesCount(service.getGraph()), 35); // +2 = 1 new cluster, 1 new tag }
@Test public void testLineageForNoDateInFeedPath() throws Exception { setupForNoDateInFeedPath(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, null, OUTPUT_INSTANCE_PATHS_NO_DATE, INPUT_INSTANCE_PATHS_NO_DATE, null), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); // Verify if instance name has nominal time List<String> feedNamesOwnedByUser = getFeedsOwnedByAUser(RelationshipType.FEED_INSTANCE.getName()); List<String> expected = Arrays.asList( "impression-feed/2014-01-01T01:00Z", "clicks-feed/2014-01-01T01:00Z", "imp-click-join1/2014-01-01T01:00Z", "imp-click-join2/2014-01-01T01:00Z"); Assert.assertTrue(feedNamesOwnedByUser.containsAll(expected)); // +5 = 1 process, 2 inputs, 2 outputs Assert.assertEquals(getVerticesCount(service.getGraph()), 22); // +34 = +26 for feed instances + 8 for process instance Assert.assertEquals(getEdgesCount(service.getGraph()), 65); }
@Test public void testOnAddClusterEntity() throws Exception { clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME, "classification=production"); verifyEntityWasAddedToGraph(CLUSTER_ENTITY_NAME, RelationshipType.CLUSTER_ENTITY); verifyClusterEntityEdges(); Assert.assertEquals(getVerticesCount(service.getGraph()), 3); // +3 = cluster, colo, tag Assert.assertEquals(getEdgesCount(service.getGraph()), 2); // +2 = cluster to colo and tag }
@Test public void testLineageForRetentionWithNoFeedsEvicted() throws Exception { cleanUp(); service.init(); long beforeVerticesCount = getVerticesCount(service.getGraph()); long beforeEdgesCount = getEdgesCount(service.getGraph()); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.DELETE, EVICTION_WORKFLOW_NAME, EVICTED_FEED, "IGNORE", "IGNORE", EVICTED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); // No new vertices added Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount); // No new edges added Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount); }
@Test(dependsOnMethods = "testOnAdd") public void testOnChange() throws Exception { // shutdown the graph and resurrect for testing service.destroy(); service.init(); // cannot modify cluster, adding a new cluster anotherCluster = addClusterEntity("another-cluster", "east-coast", "classification=another"); verifyEntityWasAddedToGraph("another-cluster", RelationshipType.CLUSTER_ENTITY); Assert.assertEquals(getVerticesCount(service.getGraph()), 20); // +3 = cluster, colo, tag // +2 edges to above, no user but only to colo and new tag Assert.assertEquals(getEdgesCount(service.getGraph()), 33); }
@Test(dependsOnMethods = "testOnAddClusterEntity") public void testOnAddFeedEntity() throws Exception { Feed impressionsFeed = addFeedEntity( "impression-feed", clusterEntity, "classified-as=Secure", "analytics", Storage.TYPE.FILESYSTEM, "/falcon/impression-feed/${YEAR}/${MONTH}/${DAY}"); inputFeeds.add(impressionsFeed); verifyEntityWasAddedToGraph(impressionsFeed.getName(), RelationshipType.FEED_ENTITY); verifyFeedEntityEdges(impressionsFeed.getName(), "Secure", "analytics"); Assert.assertEquals(getVerticesCount(service.getGraph()), 7); // +4 = feed, tag, group, user Assert.assertEquals(getEdgesCount(service.getGraph()), 6); // +4 = cluster, tag, group, user Feed clicksFeed = addFeedEntity( "clicks-feed", clusterEntity, "classified-as=Secure,classified-as=Financial", "analytics", Storage.TYPE.FILESYSTEM, "/falcon/clicks-feed/${YEAR}-${MONTH}-${DAY}"); inputFeeds.add(clicksFeed); verifyEntityWasAddedToGraph(clicksFeed.getName(), RelationshipType.FEED_ENTITY); Assert.assertEquals(getVerticesCount(service.getGraph()), 9); // feed and financial vertex Assert.assertEquals( getEdgesCount(service.getGraph()), 11); // +5 = cluster + user + 2Group + Tag Feed join1Feed = addFeedEntity( "imp-click-join1", clusterEntity, "classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM, "/falcon/imp-click-join1/${YEAR}${MONTH}${DAY}"); outputFeeds.add(join1Feed); verifyEntityWasAddedToGraph(join1Feed.getName(), RelationshipType.FEED_ENTITY); Assert.assertEquals(getVerticesCount(service.getGraph()), 12); // + 3 = 1 feed and 2 groups Assert.assertEquals(getEdgesCount(service.getGraph()), 16); // +5 = cluster + user + // Group + 2Tags Feed join2Feed = addFeedEntity( "imp-click-join2", clusterEntity, "classified-as=Secure,classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM, "/falcon/imp-click-join2/${YEAR}${MONTH}${DAY}"); outputFeeds.add(join2Feed); verifyEntityWasAddedToGraph(join2Feed.getName(), RelationshipType.FEED_ENTITY); Assert.assertEquals(getVerticesCount(service.getGraph()), 13); // +1 feed // +6 = user + 2tags + 2Groups + Cluster Assert.assertEquals(getEdgesCount(service.getGraph()), 22); }
@AfterClass public void tearDown() throws Exception { GraphUtils.dump(service.getGraph(), System.out); cleanUp(); StartupProperties.get().setProperty("falcon.graph.preserve.history", "false"); }
private void verifyLineageGraph( String feedType, List<String> expectedFeeds, List<String> secureFeeds, List<String> ownedAndSecureFeeds) { // feeds owned by a user List<String> feedNamesOwnedByUser = getFeedsOwnedByAUser(feedType); Assert.assertTrue(feedNamesOwnedByUser.containsAll(expectedFeeds)); Graph graph = service.getGraph(); Iterator<Vertex> vertices = graph.getVertices("name", "impression-feed/2014-01-01T00:00Z").iterator(); Assert.assertTrue(vertices.hasNext()); Vertex feedInstanceVertex = vertices.next(); Assert.assertEquals( feedInstanceVertex.getProperty(RelationshipProperty.TYPE.getName()), RelationshipType.FEED_INSTANCE.getName()); Object vertexId = feedInstanceVertex.getId(); Vertex vertexById = graph.getVertex(vertexId); Assert.assertEquals(vertexById, feedInstanceVertex); // feeds classified as secure verifyFeedsClassifiedAsSecure(feedType, secureFeeds); // feeds owned by a user and classified as secure verifyFeedsOwnedByUserAndClassification(feedType, "Financial", ownedAndSecureFeeds); }
@Test public void testLineageForRetention() throws Exception { setupForLineageEviction(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.DELETE, EVICTION_WORKFLOW_NAME, EVICTED_FEED, EVICTED_INSTANCE_PATHS, "IGNORE", EVICTED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); List<String> expectedFeeds = Arrays.asList( "impression-feed/2014-01-01T00:00Z", "clicks-feed/2014-01-01T00:00Z", "imp-click-join1/2014-01-01T00:00Z", "imp-click-join1/2014-01-02T00:00Z"); List<String> secureFeeds = Arrays.asList("impression-feed/2014-01-01T00:00Z", "clicks-feed/2014-01-01T00:00Z"); List<String> ownedAndSecureFeeds = Arrays.asList( "clicks-feed/2014-01-01T00:00Z", "imp-click-join1/2014-01-01T00:00Z", "imp-click-join1/2014-01-02T00:00Z"); verifyLineageGraph( RelationshipType.FEED_INSTANCE.getName(), expectedFeeds, secureFeeds, ownedAndSecureFeeds); String[] paths = EVICTED_INSTANCE_PATHS.split(EvictedInstanceSerDe.INSTANCEPATH_SEPARATOR); for (String feedInstanceDataPath : paths) { verifyLineageGraphForReplicationOrEviction( EVICTED_FEED, feedInstanceDataPath, context, RelationshipLabel.FEED_CLUSTER_EVICTED_EDGE); } // No new vertices added Assert.assertEquals(getVerticesCount(service.getGraph()), 23); // +1 = +2 for evicted-from edge from Feed Instance vertex to cluster. // -1 imp-click-join1 is added twice instead of imp-click-join2 so there is one less edge as // there is no // classified-as -> Secure edge. Assert.assertEquals(getEdgesCount(service.getGraph()), 72); }
private void setup() throws Exception { cleanUp(); service.init(); // Add cluster clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME, "classification=production"); addFeedsAndProcess(clusterEntity); }
@Test(dependsOnMethods = "testOnAddFeedEntity") public void testOnAddProcessEntity() throws Exception { processEntity = addProcessEntity( PROCESS_ENTITY_NAME, clusterEntity, "classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME, WORKFLOW_VERSION); verifyEntityWasAddedToGraph(processEntity.getName(), RelationshipType.PROCESS_ENTITY); verifyProcessEntityEdges(); // +4 = 1 process + 1 tag + 2 pipeline Assert.assertEquals(getVerticesCount(service.getGraph()), 17); // +9 = user,tag,cluster, 2 inputs,2 outputs, 2 pipelines Assert.assertEquals(getEdgesCount(service.getGraph()), 31); }
@BeforeClass public void setUp() throws Exception { CurrentUser.authenticate(FALCON_USER); configStore = ConfigurationStore.get(); Services.get().register(new WorkflowJobEndNotificationService()); StartupProperties.get() .setProperty( "falcon.graph.storage.directory", "target/graphdb-" + System.currentTimeMillis()); StartupProperties.get().setProperty("falcon.graph.preserve.history", "true"); service = new MetadataMappingService(); service.init(); Set<String> vertexPropertyKeys = service.getVertexIndexedKeys(); System.out.println("Got vertex property keys: " + vertexPropertyKeys); Set<String> edgePropertyKeys = service.getEdgeIndexedKeys(); System.out.println("Got edge property keys: " + edgePropertyKeys); }
@Test(dependsOnMethods = "testOnFeedEntityChange") public void testOnProcessEntityChange() throws Exception { Process oldProcess = processEntity; Process newProcess = EntityBuilderTestUtil.buildProcess(oldProcess.getName(), anotherCluster, null, null); EntityBuilderTestUtil.addProcessWorkflow(newProcess, GENERATE_WORKFLOW_NAME, "2.0.0"); EntityBuilderTestUtil.addInput(newProcess, inputFeeds.get(0)); try { configStore.initiateUpdate(newProcess); configStore.update(EntityType.PROCESS, newProcess); } finally { configStore.cleanupUpdateInit(); } verifyUpdatedEdges(newProcess); Assert.assertEquals(getVerticesCount(service.getGraph()), 22); // +0, no net new Assert.assertEquals( getEdgesCount(service.getGraph()), 29); // -6 = -2 outputs, -1 tag, -1 cluster, -2 pipelines }
@Test public void testMapLineage() throws Exception { setup(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, null, null, null, null), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); verifyLineageGraph(RelationshipType.FEED_INSTANCE.getName()); // +6 = 1 process, 2 inputs = 3 instances,2 outputs Assert.assertEquals(getVerticesCount(service.getGraph()), 23); // +40 = +26 for feed instances + 8 for process instance + 6 for second feed instance Assert.assertEquals(getEdgesCount(service.getGraph()), 71); }
private void setupForNoDateInFeedPath() throws Exception { cleanUp(); service.init(); // Add cluster clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME, "classification=production"); // Add input and output feeds Feed impressionsFeed = addFeedEntity( "impression-feed", clusterEntity, "classified-as=Secure", "analytics", Storage.TYPE.FILESYSTEM, "/falcon/impression-feed"); inputFeeds.add(impressionsFeed); Feed clicksFeed = addFeedEntity( "clicks-feed", clusterEntity, "classified-as=Secure,classified-as=Financial", "analytics", Storage.TYPE.FILESYSTEM, "/falcon/clicks-feed"); inputFeeds.add(clicksFeed); Feed join1Feed = addFeedEntity( "imp-click-join1", clusterEntity, "classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM, "/falcon/imp-click-join1"); outputFeeds.add(join1Feed); Feed join2Feed = addFeedEntity( "imp-click-join2", clusterEntity, "classified-as=Secure,classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM, "/falcon/imp-click-join2"); outputFeeds.add(join2Feed); processEntity = addProcessEntity( PROCESS_ENTITY_NAME, clusterEntity, "classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME, WORKFLOW_VERSION); }
private void setupForLineageReplication() throws Exception { cleanUp(); service.init(); addClusterAndFeedForReplication(); // Add output feed Feed join1Feed = addFeedEntity( "imp-click-join1", clusterEntity, "classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM, "/falcon/imp-click-join1/${YEAR}${MONTH}${DAY}"); outputFeeds.add(join1Feed); processEntity = addProcessEntity( PROCESS_ENTITY_NAME, clusterEntity, "classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME, WORKFLOW_VERSION); // GENERATE WF should have run before this to create all instance related vertices WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, "imp-click-join1", "jail://global:00/falcon/imp-click-join1/20140101", "jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); }
@Test public void testLineageForReplication() throws Exception { setupForLineageReplication(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.REPLICATE, REPLICATION_WORKFLOW_NAME, REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", "jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); verifyLineageGraphForReplicationOrEviction( REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", context, RelationshipLabel.FEED_CLUSTER_REPLICATED_EDGE); // +6 [primary, bcp cluster] = cluster, colo, tag, // +4 [input feed] = feed, tag, group, user // +4 [output feed] = 1 feed + 1 tag + 2 groups // +4 [process] = 1 process + 1 tag + 2 pipeline // +3 = 1 process, 1 input, 1 output Assert.assertEquals(getVerticesCount(service.getGraph()), 21); // +4 [cluster] = cluster to colo and tag [primary and bcp], // +4 [input feed] = cluster, tag, group, user // +5 [output feed] = cluster + user + Group + 2Tags // +7 = user,tag,cluster, 1 input,1 output, 2 pipelines // +19 = +6 for output feed instances + 7 for process instance + 6 for input feed instance // +1 for replicated-to edge to target cluster for each output feed instance Assert.assertEquals(getEdgesCount(service.getGraph()), 40); }
private void setupForLineageEviction() throws Exception { setup(); // GENERATE WF should have run before this to create all instance related vertices WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, "imp-click-join1,imp-click-join1", EVICTED_INSTANCE_PATHS, null, null), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); }
@Test public void testLineageForReplicationForNonGeneratedInstances() throws Exception { cleanUp(); service.init(); addClusterAndFeedForReplication(); // Get the vertices before running replication WF long beforeVerticesCount = getVerticesCount(service.getGraph()); long beforeEdgesCount = getEdgesCount(service.getGraph()); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.REPLICATE, REPLICATION_WORKFLOW_NAME, REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", "jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); verifyFeedEntityEdges(REPLICATED_FEED, "Secure", "analytics"); verifyLineageGraphForReplicationOrEviction( REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", context, RelationshipLabel.FEED_CLUSTER_REPLICATED_EDGE); // +1 for the new instance vertex added Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 1); // +6 = instance-of, stored-in, owned-by, classification, group, replicated-to Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 6); }
private void cleanUp() throws Exception { cleanupGraphStore(service.getGraph()); cleanupConfigurationStore(configStore); service.destroy(); }
@AfterMethod public void printGraph() throws Exception { GraphUtils.dump(service.getGraph()); }
@Test public void testGetName() throws Exception { Assert.assertEquals(service.getName(), MetadataMappingService.SERVICE_NAME); }
private GraphQuery getQuery() { return service.getGraph().query(); }