@Override protected void configure() { final Config config = ModuleUtils.getStaticConfig(); try { final DataImportConfigurationBean bean = BeanTemplateUtils.from( PropertiesUtils.getSubConfig(config, DataImportConfigurationBean.PROPERTIES_ROOT) .orElse(null), DataImportConfigurationBean.class); this.bind(DataImportConfigurationBean.class).toInstance(bean); this.bind(AnalyticStateTriggerCheckFactory.class).in(Scopes.SINGLETON); } catch (Exception e) { throw new RuntimeException( ErrorUtils.get( ErrorUtils.INVALID_CONFIG_ERROR, DataImportConfigurationBean.class.toString(), config.getConfig(DataImportConfigurationBean.PROPERTIES_ROOT)), e); } }
@Test public void test_ageOut() throws IOException, InterruptedException, ExecutionException { // Call test_endToEnd_autoTime to create 5 time based indexes // 2015-01-01 -> 2015-05-01 // How far is now from 2015-05-03 final Date d = TimeUtils.getDateFromSuffix("2015-03-02").success(); final long total_time_ms = new Date().getTime() - d.getTime(); final long total_days = total_time_ms / (1000L * 3600L * 24L); final String age_out = ErrorUtils.get("{0} days", total_days); final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/end-end/auto-time") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, age_out) .done() .get()) .done() .get()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); test_endToEnd_autoTime(false); _index_service ._crud_factory .getClient() .admin() .indices() .prepareCreate(template_name + "_2015-03-01_1") .execute() .actionGet(); final GetMappingsResponse gmr = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(6, gmr.getMappings().keys().size()); CompletableFuture<BasicMessageBean> cf = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res = cf.get(); assertEquals(true, res.success()); assertTrue("sensible message: " + res.message(), res.message().contains(" 2 ")); assertTrue( "Message marked as loggable: " + res.details(), Optional.ofNullable(res.details()).filter(m -> m.containsKey("loggable")).isPresent()); System.out.println("Return from to delete: " + res.message()); Thread.sleep(5000L); // give the indexes time to delete final GetMappingsResponse gmr2 = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(3, gmr2.getMappings().keys().size()); // Check some edge cases: // 1) Run it again, returns success but not loggable: CompletableFuture<BasicMessageBean> cf2 = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res2 = cf2.get(); assertEquals(true, res2.success()); assertTrue("sensible message: " + res2.message(), res2.message().contains(" 0 ")); assertTrue( "Message _not_ marked as loggable: " + res2.details(), !Optional.ofNullable(res2.details()).map(m -> m.get("loggable")).isPresent()); // 2) No temporal settings final DataBucketBean bucket3 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/not/temporal") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).done().get()) .done() .get(); CompletableFuture<BasicMessageBean> cf3 = _index_service.getDataService().get().handleAgeOutRequest(bucket3); BasicMessageBean res3 = cf3.get(); // no temporal settings => returns success assertEquals(true, res3.success()); // 3) Unparseable temporal settings (in theory won't validate but we can test here) final DataBucketBean bucket4 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/temporal/malformed") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, "bananas") .done() .get()) .done() .get()) .done() .get(); CompletableFuture<BasicMessageBean> cf4 = _index_service.getDataService().get().handleAgeOutRequest(bucket4); BasicMessageBean res4 = cf4.get(); // no temporal settings => returns success assertEquals(false, res4.success()); }
@Test public void test_getStreamingTopology() throws UnsupportedFileSystemException, InterruptedException, ExecutionException { final DataBucketBean bucket = createBucket("test_tech_id_stream"); final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-topology-example.jar"; final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1)); final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar"; final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2)); List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2); ////////////////////////////////////////////////////// // 1) Check - if called with an error, then just passes that error along final BasicMessageBean error = SharedErrorUtils.buildErrorMessage("test_source", "test_message", "test_error"); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test1 = DataBucketChangeActor.getStreamingTopology( bucket, new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2", Validation.fail(error)); assertTrue("Got error back", test1.isFail()); assertEquals("test_source", test1.fail().source()); assertEquals("test_message", test1.fail().command()); assertEquals("test_error", test1.fail().message()); ////////////////////////////////////////////////////// // 2) Check the error handling inside getStreamingTopology final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test2_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_2b", Tuples._2T(null, null)) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2a")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2a", Validation.success(test2_input)); assertTrue("Got error back", test2a.isFail()); assertEquals("test_source2a", test2a.fail().source()); assertEquals("BucketActionOfferMessage", test2a.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2b")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2b", Validation.success(test2_input)); assertTrue("Got error back", test2b.isFail()); assertEquals("test_source2b", test2b.fail().source()); assertEquals("BucketActionOfferMessage", test2b.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); ////////////////////////////////////////////////////// // 3) OK now it will actually do something final String java_name = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_tech_id_stream.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name + ": " + new File(java_name).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(0), _service_context.getStorageService(), "test3", "test3") .get(); if (cached_file.isFail()) { fail("About to crash with: " + cached_file.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name).exists()); // OK the setup is done and validated now actually test the underlying call: final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3 = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3_input)); if (test3.isFail()) { fail("About to crash with: " + test3.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3.isSuccess()); assertTrue("topology created: ", test3.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3.success().getClass().getName()); // (Try again but with failing version, due to class not found) final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3a_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_fail", Tuples._2T(lib_elements.get(3), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_fail")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3a_input)); assertTrue("Got error back", test3a.isFail()); assertTrue( "Right error: " + test3a.fail().message(), test3a.fail().message().contains("com.ikanow.aleph2.test.example.ExampleStreamTopology")); // Now check with the "not just the harvest tech" flag set final String java_name2 = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_module_id.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name2 + ": " + new File(java_name2).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file2 = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(1), _service_context.getStorageService(), "test3b", "test3b") .get(); if (cached_file2.isFail()) { fail("About to crash with: " + cached_file2.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name2).exists()); final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3b_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .put("test_module_id", Tuples._2T(lib_elements.get(1), cached_file.success())) .build(); final EnrichmentControlMetadataBean enrichment_module = new EnrichmentControlMetadataBean( "test_tech_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id"), null, null); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3b", Validation.success(test3b_input)); if (test3b.isFail()) { fail("About to crash with: " + test3b.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3b.isSuccess()); assertTrue("topology created: ", test3b.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3b.success().getClass().getName()); // TODO add a test for disabled streaming but config given (should default to passthrough top // and // ignore given topology }
public void start() { final String hostname = _local_actor_context.getInformationService().getHostname(); final int MAX_ZK_ATTEMPTS = 6; if (!_core_distributed_services.waitForAkkaJoin( Optional.of(Duration.create(60L, TimeUnit.SECONDS)))) { _core_distributed_services.getAkkaSystem().terminate(); // (last ditch attempt to recover) throw new RuntimeException("Problem with CDS/Akka, try to terminate"); } //////////////////////////////////////////////////////////////// // HARVEST if (_service_config.harvest_enabled()) { // Create a bucket change actor and register it vs the local message bus final ActorRef handler = _local_actor_context .getActorSystem() .actorOf( Props.create( com.ikanow.aleph2.data_import_manager.harvest.actors .DataBucketHarvestChangeActor.class), hostname + ActorNameUtils.HARVEST_BUCKET_CHANGE_SUFFIX); _logger.info( ErrorUtils.get( "Attaching harvest DataBucketHarvestChangeActor {0} to bus {1}", handler, ActorUtils.BUCKET_ACTION_EVENT_BUS)); _db_actor_context .getBucketActionMessageBus() .subscribe(handler, ActorUtils.BUCKET_ACTION_EVENT_BUS); _logger.info( ErrorUtils.get("Registering {1} with {0}", ActorUtils.BUCKET_ACTION_ZOOKEEPER, hostname)); for (int i = 0; i <= MAX_ZK_ATTEMPTS; ++i) { try { _core_distributed_services .getCuratorFramework() .create() .creatingParentsIfNeeded() .withMode(CreateMode.EPHEMERAL) .forPath(ActorUtils.BUCKET_ACTION_ZOOKEEPER + "/" + hostname); break; } catch (Exception e) { _logger.warn( ErrorUtils.getLongForm( "Failed to register with Zookeeper: {0}, retrying={1}", e, i < MAX_ZK_ATTEMPTS)); try { Thread.sleep(10000L); } catch (Exception __) { } } } Runtime.getRuntime() .addShutdownHook( new Thread( Lambdas.wrap_runnable_u( () -> { _logger.info( "Shutting down IkanowV1SynchronizationModule subservice=v1_sync_service"); _core_distributed_services .getCuratorFramework() .delete() .deletingChildrenIfNeeded() .forPath(ActorUtils.BUCKET_ACTION_ZOOKEEPER + "/" + hostname); }))); _logger.info("Starting IkanowV1SynchronizationModule subservice=v1_sync_service"); } //////////////////////////////////////////////////////////////// // ANALYTICS if (_service_config.analytics_enabled()) { // Create a analytics bucket change actor and register it vs the local message bus final ActorRef analytics_handler = _local_actor_context .getActorSystem() .actorOf( Props.create( com.ikanow.aleph2.data_import_manager.analytics.actors .DataBucketAnalyticsChangeActor.class), hostname + ActorNameUtils.ANALYTICS_BUCKET_CHANGE_SUFFIX); _logger.info( ErrorUtils.get( "Attaching analytics DataBucketAnalyticsChangeActor {0} to bus {1}", analytics_handler, ActorUtils.BUCKET_ANALYTICS_EVENT_BUS)); _db_actor_context .getAnalyticsMessageBus() .subscribe(analytics_handler, ActorUtils.BUCKET_ANALYTICS_EVENT_BUS); // Create trigger supervisor and worker final Optional<ActorRef> trigger_supervisor = _core_distributed_services.createSingletonActor( hostname + ActorNameUtils.ANALYTICS_TRIGGER_SUPERVISOR_SUFFIX, ImmutableSet.<String>builder() .add( DistributedServicesPropertyBean.ApplicationNames.DataImportManager.toString()) .build(), Props.create( com.ikanow.aleph2.data_import_manager.analytics.actors .AnalyticsTriggerSupervisorActor.class)); if (!trigger_supervisor.isPresent()) { _logger.error( "Analytic trigger supervisor didn't start, unknown reason (wrong CDS application_name?)"); } final ActorRef trigger_worker = _local_actor_context .getActorSystem() .actorOf( Props.create( com.ikanow.aleph2.data_import_manager.analytics.actors .AnalyticsTriggerWorkerActor.class), hostname + ActorNameUtils.ANALYTICS_TRIGGER_WORKER_SUFFIX); _logger.info( ErrorUtils.get( "Attaching analytics AnalyticsTriggerWorkerActor {0} to bus {1}", trigger_worker, ActorUtils.ANALYTICS_TRIGGER_BUS)); _db_actor_context .getAnalyticsTriggerBus() .subscribe(trigger_worker, ActorUtils.ANALYTICS_TRIGGER_BUS); _logger.info( ErrorUtils.get( "Registering {1} with {0}", ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER, hostname)); for (int i = 0; i <= MAX_ZK_ATTEMPTS; ++i) { try { _core_distributed_services .getCuratorFramework() .create() .creatingParentsIfNeeded() .withMode(CreateMode.EPHEMERAL) .forPath(ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER + "/" + hostname); break; } catch (Exception e) { _logger.warn( ErrorUtils.getLongForm( "Failed to register with Zookeeper: {0}, retrying={1}", e, i < MAX_ZK_ATTEMPTS)); try { Thread.sleep(10000L); } catch (Exception __) { } } } Runtime.getRuntime() .addShutdownHook( new Thread( Lambdas.wrap_runnable_u( () -> { _logger.info( "Shutting down IkanowV1SynchronizationModule subservice=analytics"); _core_distributed_services .getCuratorFramework() .delete() .deletingChildrenIfNeeded() .forPath(ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER + "/" + hostname); }))); _logger.info("Starting IkanowV1SynchronizationModule subservice=analytics"); } //////////////////////////////////////////////////////////////// // GOVERNANCE if (_service_config.governance_enabled()) { _core_distributed_services.createSingletonActor( hostname + ".governance.actors.DataAgeOutSupervisor", ImmutableSet.<String>builder() .add(DistributedServicesPropertyBean.ApplicationNames.DataImportManager.toString()) .build(), Props.create(DataAgeOutSupervisor.class)); _logger.info("Starting IkanowV1SynchronizationModule subservice=governance"); } for (; ; ) { try { Thread.sleep(10000); } catch (Exception e) { } } }
/* (non-Javadoc) * @see akka.actor.AbstractActor#receive() */ @Override public PartialFunction<Object, BoxedUnit> receive() { return ReceiveBuilder.match( BucketActionMessage.class, m -> !m.handling_clients().isEmpty() && !m.handling_clients() .contains(_context.getInformationService().getHostname()), __ -> {}) // (do nothing if it's not for me) .match( BucketActionOfferMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket {3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (this isn't async so doesn't require any futures) final boolean accept_or_ignore = new File(_globals.local_yarn_config_dir() + File.separator + "storm.yaml") .exists(); final BucketActionReplyMessage reply = accept_or_ignore ? new BucketActionReplyMessage.BucketActionWillAcceptMessage(hostname) : new BucketActionReplyMessage.BucketActionIgnoredMessage(hostname); closing_sender.tell(reply, closing_self); }) .match( BucketActionMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket={3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (cacheJars can't throw checked or unchecked in this thread, only from within // exceptions) cacheJars( m.bucket(), _management_db, _globals, _fs, _context.getServiceContext(), hostname, m) .thenComposeAsync( err_or_map -> { final StreamingEnrichmentContext e_context = _context.getNewStreamingEnrichmentContext(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_tech_module = getStreamingTopology(m.bucket(), m, hostname, err_or_map); final CompletableFuture<BucketActionReplyMessage> ret = talkToStream( _storm_controller, m.bucket(), m, err_or_tech_module, err_or_map, hostname, e_context, _globals.local_yarn_config_dir(), _globals.local_cached_jar_dir()); return ret; }) .thenAccept( reply -> { // (reply can contain an error or successful reply, they're the // same bean type) // Some information logging: Patterns.match(reply) .andAct() .when( BucketActionHandlerMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}, success={2}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.reply().success()))) .when( BucketActionReplyMessage.BucketActionWillAcceptMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name()))) .otherwise( msg -> _logger.info( ErrorUtils.get( "Unusual reply to message={0}, type={2}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.getClass().getSimpleName()))); closing_sender.tell(reply, closing_self); }) .exceptionally( e -> { // another bit of error handling that shouldn't ever be called but is a // useful backstop // Some information logging: _logger.warn( "Unexpected error replying to '{0}': error = {1}, bucket={2}", BeanTemplateUtils.toJson(m).toString(), ErrorUtils.getLongForm("{0}", e), m.bucket().full_name()); final BasicMessageBean error_bean = SharedErrorUtils.buildErrorMessage( hostname, m, ErrorUtils.getLongForm( StreamErrorUtils.STREAM_UNKNOWN_ERROR, e, m.bucket().full_name())); closing_sender.tell( new BucketActionHandlerMessage(hostname, error_bean), closing_self); return null; }); }) .build(); }