@Test public void testLineageForRetentionWithNoFeedsEvicted() throws Exception { cleanUp(); service.init(); long beforeVerticesCount = getVerticesCount(service.getGraph()); long beforeEdgesCount = getEdgesCount(service.getGraph()); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.DELETE, EVICTION_WORKFLOW_NAME, EVICTED_FEED, "IGNORE", "IGNORE", EVICTED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); // No new vertices added Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount); // No new edges added Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount); }
@Test public void testLineageForNoDateInFeedPath() throws Exception { setupForNoDateInFeedPath(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, null, OUTPUT_INSTANCE_PATHS_NO_DATE, INPUT_INSTANCE_PATHS_NO_DATE, null), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); // Verify if instance name has nominal time List<String> feedNamesOwnedByUser = getFeedsOwnedByAUser(RelationshipType.FEED_INSTANCE.getName()); List<String> expected = Arrays.asList( "impression-feed/2014-01-01T01:00Z", "clicks-feed/2014-01-01T01:00Z", "imp-click-join1/2014-01-01T01:00Z", "imp-click-join2/2014-01-01T01:00Z"); Assert.assertTrue(feedNamesOwnedByUser.containsAll(expected)); // +5 = 1 process, 2 inputs, 2 outputs Assert.assertEquals(getVerticesCount(service.getGraph()), 22); // +34 = +26 for feed instances + 8 for process instance Assert.assertEquals(getEdgesCount(service.getGraph()), 65); }
private void setupForLineageEviction() throws Exception { setup(); // GENERATE WF should have run before this to create all instance related vertices WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, "imp-click-join1,imp-click-join1", EVICTED_INSTANCE_PATHS, null, null), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); }
@Test public void testLineageForRetention() throws Exception { setupForLineageEviction(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.DELETE, EVICTION_WORKFLOW_NAME, EVICTED_FEED, EVICTED_INSTANCE_PATHS, "IGNORE", EVICTED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); List<String> expectedFeeds = Arrays.asList( "impression-feed/2014-01-01T00:00Z", "clicks-feed/2014-01-01T00:00Z", "imp-click-join1/2014-01-01T00:00Z", "imp-click-join1/2014-01-02T00:00Z"); List<String> secureFeeds = Arrays.asList("impression-feed/2014-01-01T00:00Z", "clicks-feed/2014-01-01T00:00Z"); List<String> ownedAndSecureFeeds = Arrays.asList( "clicks-feed/2014-01-01T00:00Z", "imp-click-join1/2014-01-01T00:00Z", "imp-click-join1/2014-01-02T00:00Z"); verifyLineageGraph( RelationshipType.FEED_INSTANCE.getName(), expectedFeeds, secureFeeds, ownedAndSecureFeeds); String[] paths = EVICTED_INSTANCE_PATHS.split(EvictedInstanceSerDe.INSTANCEPATH_SEPARATOR); for (String feedInstanceDataPath : paths) { verifyLineageGraphForReplicationOrEviction( EVICTED_FEED, feedInstanceDataPath, context, RelationshipLabel.FEED_CLUSTER_EVICTED_EDGE); } // No new vertices added Assert.assertEquals(getVerticesCount(service.getGraph()), 23); // +1 = +2 for evicted-from edge from Feed Instance vertex to cluster. // -1 imp-click-join1 is added twice instead of imp-click-join2 so there is one less edge as // there is no // classified-as -> Secure edge. Assert.assertEquals(getEdgesCount(service.getGraph()), 72); }
@Test public void testMapLineage() throws Exception { setup(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, null, null, null, null), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); verifyLineageGraph(RelationshipType.FEED_INSTANCE.getName()); // +6 = 1 process, 2 inputs = 3 instances,2 outputs Assert.assertEquals(getVerticesCount(service.getGraph()), 23); // +40 = +26 for feed instances + 8 for process instance + 6 for second feed instance Assert.assertEquals(getEdgesCount(service.getGraph()), 71); }
@Test public void testLineageForReplication() throws Exception { setupForLineageReplication(); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.REPLICATE, REPLICATION_WORKFLOW_NAME, REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", "jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); verifyLineageGraphForReplicationOrEviction( REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", context, RelationshipLabel.FEED_CLUSTER_REPLICATED_EDGE); // +6 [primary, bcp cluster] = cluster, colo, tag, // +4 [input feed] = feed, tag, group, user // +4 [output feed] = 1 feed + 1 tag + 2 groups // +4 [process] = 1 process + 1 tag + 2 pipeline // +3 = 1 process, 1 input, 1 output Assert.assertEquals(getVerticesCount(service.getGraph()), 21); // +4 [cluster] = cluster to colo and tag [primary and bcp], // +4 [input feed] = cluster, tag, group, user // +5 [output feed] = cluster + user + Group + 2Tags // +7 = user,tag,cluster, 1 input,1 output, 2 pipelines // +19 = +6 for output feed instances + 7 for process instance + 6 for input feed instance // +1 for replicated-to edge to target cluster for each output feed instance Assert.assertEquals(getEdgesCount(service.getGraph()), 40); }
private void setupForLineageReplication() throws Exception { cleanUp(); service.init(); addClusterAndFeedForReplication(); // Add output feed Feed join1Feed = addFeedEntity( "imp-click-join1", clusterEntity, "classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM, "/falcon/imp-click-join1/${YEAR}${MONTH}${DAY}"); outputFeeds.add(join1Feed); processEntity = addProcessEntity( PROCESS_ENTITY_NAME, clusterEntity, "classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME, WORKFLOW_VERSION); // GENERATE WF should have run before this to create all instance related vertices WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, "imp-click-join1", "jail://global:00/falcon/imp-click-join1/20140101", "jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); }
@Test public void testLineageForReplicationForNonGeneratedInstances() throws Exception { cleanUp(); service.init(); addClusterAndFeedForReplication(); // Get the vertices before running replication WF long beforeVerticesCount = getVerticesCount(service.getGraph()); long beforeEdgesCount = getEdgesCount(service.getGraph()); WorkflowExecutionContext context = WorkflowExecutionContext.create( getTestMessageArgs( EntityOperations.REPLICATE, REPLICATION_WORKFLOW_NAME, REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", "jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING); service.onSuccess(context); debug(service.getGraph()); GraphUtils.dump(service.getGraph()); verifyFeedEntityEdges(REPLICATED_FEED, "Secure", "analytics"); verifyLineageGraphForReplicationOrEviction( REPLICATED_FEED, "jail://global:00/falcon/raw-click/bcp/20140101", context, RelationshipLabel.FEED_CLUSTER_REPLICATED_EDGE); // +1 for the new instance vertex added Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 1); // +6 = instance-of, stored-in, owned-by, classification, group, replicated-to Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 6); }