@Test public void test_getStreamingTopology() throws UnsupportedFileSystemException, InterruptedException, ExecutionException { final DataBucketBean bucket = createBucket("test_tech_id_stream"); final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-topology-example.jar"; final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1)); final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar"; final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2)); List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2); ////////////////////////////////////////////////////// // 1) Check - if called with an error, then just passes that error along final BasicMessageBean error = SharedErrorUtils.buildErrorMessage("test_source", "test_message", "test_error"); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test1 = DataBucketChangeActor.getStreamingTopology( bucket, new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2", Validation.fail(error)); assertTrue("Got error back", test1.isFail()); assertEquals("test_source", test1.fail().source()); assertEquals("test_message", test1.fail().command()); assertEquals("test_error", test1.fail().message()); ////////////////////////////////////////////////////// // 2) Check the error handling inside getStreamingTopology final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test2_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_2b", Tuples._2T(null, null)) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2a")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2a", Validation.success(test2_input)); assertTrue("Got error back", test2a.isFail()); assertEquals("test_source2a", test2a.fail().source()); assertEquals("BucketActionOfferMessage", test2a.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2b")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2b", Validation.success(test2_input)); assertTrue("Got error back", test2b.isFail()); assertEquals("test_source2b", test2b.fail().source()); assertEquals("BucketActionOfferMessage", test2b.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); ////////////////////////////////////////////////////// // 3) OK now it will actually do something final String java_name = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_tech_id_stream.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name + ": " + new File(java_name).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(0), _service_context.getStorageService(), "test3", "test3") .get(); if (cached_file.isFail()) { fail("About to crash with: " + cached_file.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name).exists()); // OK the setup is done and validated now actually test the underlying call: final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3 = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3_input)); if (test3.isFail()) { fail("About to crash with: " + test3.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3.isSuccess()); assertTrue("topology created: ", test3.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3.success().getClass().getName()); // (Try again but with failing version, due to class not found) final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3a_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_fail", Tuples._2T(lib_elements.get(3), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_fail")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3a_input)); assertTrue("Got error back", test3a.isFail()); assertTrue( "Right error: " + test3a.fail().message(), test3a.fail().message().contains("com.ikanow.aleph2.test.example.ExampleStreamTopology")); // Now check with the "not just the harvest tech" flag set final String java_name2 = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_module_id.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name2 + ": " + new File(java_name2).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file2 = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(1), _service_context.getStorageService(), "test3b", "test3b") .get(); if (cached_file2.isFail()) { fail("About to crash with: " + cached_file2.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name2).exists()); final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3b_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .put("test_module_id", Tuples._2T(lib_elements.get(1), cached_file.success())) .build(); final EnrichmentControlMetadataBean enrichment_module = new EnrichmentControlMetadataBean( "test_tech_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id"), null, null); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3b", Validation.success(test3b_input)); if (test3b.isFail()) { fail("About to crash with: " + test3b.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3b.isSuccess()); assertTrue("topology created: ", test3b.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3b.success().getClass().getName()); // TODO add a test for disabled streaming but config given (should default to passthrough top // and // ignore given topology }
/** * Talks to the topology module - this top level function just sets the classloader up and creates * the module, then calls talkToStream to do the talking * * @param bucket * @param libs * @param harvest_tech_only * @param m * @param source * @return */ protected static Validation<BasicMessageBean, IEnrichmentStreamingTopology> getStreamingTopology( final DataBucketBean bucket, final BucketActionMessage m, final String source, final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> err_or_libs // "pipeline element" ) { try { return err_or_libs.<Validation<BasicMessageBean, IEnrichmentStreamingTopology>>validation( // Error: error -> Validation.fail(error), // Normal libs -> { // Easy case, if streaming is turned off, just pass data through this layer if (!Optional.ofNullable(bucket.streaming_enrichment_topology().enabled()).orElse(true)) return Validation.success(new PassthroughTopology()); // Easy case, if libs is empty then use the default streaming topology if (libs.isEmpty()) { return Validation.success(new PassthroughTopology()); } final Tuple2<SharedLibraryBean, String> libbean_path = libs.values() .stream() .filter( t2 -> (null != t2._1()) && (null != Optional.ofNullable( t2._1().streaming_enrichment_entry_point()) .orElse(t2._1().misc_entry_point()))) .findFirst() .orElse(null); if ((null == libbean_path) || (null == libbean_path._2())) { // Nice easy error case, probably can't ever happen return Validation.fail( SharedErrorUtils.buildErrorMessage( source, m, SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)")); } final Validation<BasicMessageBean, IEnrichmentStreamingTopology> ret_val = ClassloaderUtils.getFromCustomClasspath( IEnrichmentStreamingTopology.class, Optional.ofNullable(libbean_path._1().streaming_enrichment_entry_point()) .orElse(libbean_path._1().misc_entry_point()), Optional.of(libbean_path._2()), libs.values().stream().map(lp -> lp._2()).collect(Collectors.toList()), source, m); return ret_val; }); } catch (Throwable t) { return Validation.fail( SharedErrorUtils.buildErrorMessage( source, m, ErrorUtils.getLongForm( SharedErrorUtils.ERROR_LOADING_CLASS, t, bucket.harvest_technology_name_or_id()))); } }
/** * Given a bucket ...returns either - a future containing the first error encountered, _or_ a map * (both name and id as keys) of path names (and guarantee that the file has been cached when the * future completes) * * @param bucket * @param management_db * @param globals * @param fs * @param handler_for_errors * @param msg_for_errors * @return a future containing the first error encountered, _or_ a map (both name and id as keys) * of path names */ @SuppressWarnings("unchecked") protected static <M> CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> cacheJars( final DataBucketBean bucket, final IManagementDbService management_db, final GlobalPropertiesBean globals, final IStorageService fs, final IServiceContext context, final String handler_for_errors, final M msg_for_errors) { try { MethodNamingHelper<SharedLibraryBean> helper = BeanTemplateUtils.from(SharedLibraryBean.class); final Optional<QueryComponent<SharedLibraryBean>> spec = getQuery(bucket); if (!spec.isPresent()) { return CompletableFuture.completedFuture( Validation.<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>success( Collections.emptyMap())); } return management_db .getSharedLibraryStore() .secured(context, new AuthorizationBean(bucket.owner_id())) .getObjectsBySpec(spec.get()) .thenComposeAsync( cursor -> { // This is a map of futures from the cache call - either an error or the path name // note we use a tuple of (id, name) as the key and then flatten out later final Map< Tuple2<String, String>, Tuple2< SharedLibraryBean, CompletableFuture<Validation<BasicMessageBean, String>>>> map_of_futures = StreamSupport.stream(cursor.spliterator(), true) .filter( lib -> { return true; }) .collect( Collectors .<SharedLibraryBean, Tuple2<String, String>, Tuple2< SharedLibraryBean, CompletableFuture< Validation<BasicMessageBean, String>>>> toMap( // want to keep both the name and id versions - will // flatten out below lib -> Tuples._2T(lib.path_name(), lib._id()), // (key) // spin off a future in which the file is being copied - // save the shared library bean also lib -> Tuples._2T( lib, // (value) JarCacheUtils.getCachedJar( globals.local_cached_jar_dir(), lib, fs, handler_for_errors, msg_for_errors)))); // denest from map of futures to future of maps, also handle any errors here: // (some sort of "lift" function would be useful here - this are a somewhat // inelegant few steps) final CompletableFuture<Validation<BasicMessageBean, String>>[] futures = (CompletableFuture<Validation<BasicMessageBean, String>>[]) map_of_futures .values() .stream() .map(t2 -> t2._2()) .collect(Collectors.toList()) .toArray(new CompletableFuture[0]); // (have to embed this thenApply instead of bringing it outside as part of the // toCompose chain, because otherwise we'd lose map_of_futures scope) return CompletableFuture.allOf(futures) .<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> thenApply( f -> { try { final Map<String, Tuple2<SharedLibraryBean, String>> almost_there = map_of_futures .entrySet() .stream() .flatMap( kv -> { final Validation<BasicMessageBean, String> ret = kv.getValue() ._2() .join(); // (must have already returned if // here return ret .<Stream< Tuple2< String, Tuple2<SharedLibraryBean, String>>>> validation( // Error: err -> { throw new RuntimeException( err.message()); } // (not ideal, but will do) , // Normal: s -> { return Arrays.asList( Tuples._2T( kv.getKey()._1(), Tuples._2T( kv.getValue()._1(), s)), // result object // with path_name Tuples._2T( kv.getKey()._2(), Tuples._2T( kv.getValue()._1(), s))) // result object // with id .stream(); }); }) .collect( Collectors .<Tuple2<String, Tuple2<SharedLibraryBean, String>>, String, Tuple2<SharedLibraryBean, String>> toMap( idname_path -> idname_path._1(), // (key) idname_path -> idname_path._2() // (value) )); return Validation .<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> success(almost_there); } catch ( Exception e) { // handle the exception thrown above containing the // message bean from whatever the original error was! return Validation .<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> fail( SharedErrorUtils.buildErrorMessage( handler_for_errors.toString(), msg_for_errors, e.getMessage())); } }); }); } catch (Throwable e) { // (can only occur if the DB call errors) return CompletableFuture.completedFuture( Validation.fail( SharedErrorUtils.buildErrorMessage( handler_for_errors.toString(), msg_for_errors, ErrorUtils.getLongForm( SharedErrorUtils.ERROR_CACHING_SHARED_LIBS, e, bucket.full_name())))); } }
/* (non-Javadoc) * @see akka.actor.AbstractActor#receive() */ @Override public PartialFunction<Object, BoxedUnit> receive() { return ReceiveBuilder.match( BucketActionMessage.class, m -> !m.handling_clients().isEmpty() && !m.handling_clients() .contains(_context.getInformationService().getHostname()), __ -> {}) // (do nothing if it's not for me) .match( BucketActionOfferMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket {3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (this isn't async so doesn't require any futures) final boolean accept_or_ignore = new File(_globals.local_yarn_config_dir() + File.separator + "storm.yaml") .exists(); final BucketActionReplyMessage reply = accept_or_ignore ? new BucketActionReplyMessage.BucketActionWillAcceptMessage(hostname) : new BucketActionReplyMessage.BucketActionIgnoredMessage(hostname); closing_sender.tell(reply, closing_self); }) .match( BucketActionMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket={3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (cacheJars can't throw checked or unchecked in this thread, only from within // exceptions) cacheJars( m.bucket(), _management_db, _globals, _fs, _context.getServiceContext(), hostname, m) .thenComposeAsync( err_or_map -> { final StreamingEnrichmentContext e_context = _context.getNewStreamingEnrichmentContext(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_tech_module = getStreamingTopology(m.bucket(), m, hostname, err_or_map); final CompletableFuture<BucketActionReplyMessage> ret = talkToStream( _storm_controller, m.bucket(), m, err_or_tech_module, err_or_map, hostname, e_context, _globals.local_yarn_config_dir(), _globals.local_cached_jar_dir()); return ret; }) .thenAccept( reply -> { // (reply can contain an error or successful reply, they're the // same bean type) // Some information logging: Patterns.match(reply) .andAct() .when( BucketActionHandlerMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}, success={2}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.reply().success()))) .when( BucketActionReplyMessage.BucketActionWillAcceptMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name()))) .otherwise( msg -> _logger.info( ErrorUtils.get( "Unusual reply to message={0}, type={2}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.getClass().getSimpleName()))); closing_sender.tell(reply, closing_self); }) .exceptionally( e -> { // another bit of error handling that shouldn't ever be called but is a // useful backstop // Some information logging: _logger.warn( "Unexpected error replying to '{0}': error = {1}, bucket={2}", BeanTemplateUtils.toJson(m).toString(), ErrorUtils.getLongForm("{0}", e), m.bucket().full_name()); final BasicMessageBean error_bean = SharedErrorUtils.buildErrorMessage( hostname, m, ErrorUtils.getLongForm( StreamErrorUtils.STREAM_UNKNOWN_ERROR, e, m.bucket().full_name())); closing_sender.tell( new BucketActionHandlerMessage(hostname, error_bean), closing_self); return null; }); }) .build(); }