/** * Initializes the storm instance * * @return a real storm controller if possible, else a no controller */ @SuppressWarnings("unchecked") public static IStormController getController() { final GlobalPropertiesBean globals = Lambdas.get( () -> { try { return BeanTemplateUtils.from( PropertiesUtils.getSubConfig( ModuleUtils.getStaticConfig(), GlobalPropertiesBean.PROPERTIES_ROOT) .orElse(null), GlobalPropertiesBean.class); } catch (IOException e) { _logger.error( ErrorUtils.getLongForm( "Couldn't set globals property bean in storm harvest tech onInit: {0}", e)); return null; } }); if (null == globals) { return new NoStormController(); } _logger.info( "Loading storm config from: " + globals.local_yarn_config_dir() + File.separator + "storm.yaml"); Yaml yaml = new Yaml(); InputStream input; Map<String, Object> object; try { input = new FileInputStream( new File(globals.local_yarn_config_dir() + File.separator + "storm.yaml")); object = (Map<String, Object>) yaml.load(input); } catch (FileNotFoundException e) { _logger.error( ErrorUtils.getLongForm("Error reading storm.yaml in storm harvest tech onInit: {0}", e)); object = new HashMap<String, Object>(); } if (object.containsKey(backtype.storm.Config.NIMBUS_HOST)) { _logger.info("starting in remote mode v5"); _logger.info(object.get(backtype.storm.Config.NIMBUS_HOST)); // run in distributed mode IStormController storm_controller = StormControllerUtil.getRemoteStormController( (String) object.get(backtype.storm.Config.NIMBUS_HOST), (int) object.get(backtype.storm.Config.NIMBUS_THRIFT_PORT), (String) object.get(backtype.storm.Config.STORM_THRIFT_TRANSPORT_PLUGIN)); return storm_controller; } else { return new NoStormController(); } }
@Override protected void configure() { final Config config = ModuleUtils.getStaticConfig(); try { final DataImportConfigurationBean bean = BeanTemplateUtils.from( PropertiesUtils.getSubConfig(config, DataImportConfigurationBean.PROPERTIES_ROOT) .orElse(null), DataImportConfigurationBean.class); this.bind(DataImportConfigurationBean.class).toInstance(bean); this.bind(AnalyticStateTriggerCheckFactory.class).in(Scopes.SINGLETON); } catch (Exception e) { throw new RuntimeException( ErrorUtils.get( ErrorUtils.INVALID_CONFIG_ERROR, DataImportConfigurationBean.class.toString(), config.getConfig(DataImportConfigurationBean.PROPERTIES_ROOT)), e); } }
/** * Entry point * * @param args - config_file source_key harvest_tech_id * @throws Exception */ public static void main(final String[] args) { try { if (args.length < 1) { System.out.println("CLI: config_file"); System.exit(-1); } System.out.println("Running with command line: " + Arrays.toString(args)); final Config config = ConfigFactory.parseFile(new File(args[0])); final DataImportManagerModule app = ModuleUtils.initializeApplication( Arrays.asList(new Module()), Optional.of(config), Either.left(DataImportManagerModule.class)); app.start(); } catch (Throwable e) { _logger.error(ErrorUtils.getLongForm("Exception reached main(): {0}", e)); try { e.printStackTrace(); } catch (Exception e2) { // the exception failed! } System.exit(-1); } }
@Test public void test_ageOut() throws IOException, InterruptedException, ExecutionException { // Call test_endToEnd_autoTime to create 5 time based indexes // 2015-01-01 -> 2015-05-01 // How far is now from 2015-05-03 final Date d = TimeUtils.getDateFromSuffix("2015-03-02").success(); final long total_time_ms = new Date().getTime() - d.getTime(); final long total_days = total_time_ms / (1000L * 3600L * 24L); final String age_out = ErrorUtils.get("{0} days", total_days); final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/end-end/auto-time") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, age_out) .done() .get()) .done() .get()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); test_endToEnd_autoTime(false); _index_service ._crud_factory .getClient() .admin() .indices() .prepareCreate(template_name + "_2015-03-01_1") .execute() .actionGet(); final GetMappingsResponse gmr = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(6, gmr.getMappings().keys().size()); CompletableFuture<BasicMessageBean> cf = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res = cf.get(); assertEquals(true, res.success()); assertTrue("sensible message: " + res.message(), res.message().contains(" 2 ")); assertTrue( "Message marked as loggable: " + res.details(), Optional.ofNullable(res.details()).filter(m -> m.containsKey("loggable")).isPresent()); System.out.println("Return from to delete: " + res.message()); Thread.sleep(5000L); // give the indexes time to delete final GetMappingsResponse gmr2 = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(3, gmr2.getMappings().keys().size()); // Check some edge cases: // 1) Run it again, returns success but not loggable: CompletableFuture<BasicMessageBean> cf2 = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res2 = cf2.get(); assertEquals(true, res2.success()); assertTrue("sensible message: " + res2.message(), res2.message().contains(" 0 ")); assertTrue( "Message _not_ marked as loggable: " + res2.details(), !Optional.ofNullable(res2.details()).map(m -> m.get("loggable")).isPresent()); // 2) No temporal settings final DataBucketBean bucket3 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/not/temporal") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).done().get()) .done() .get(); CompletableFuture<BasicMessageBean> cf3 = _index_service.getDataService().get().handleAgeOutRequest(bucket3); BasicMessageBean res3 = cf3.get(); // no temporal settings => returns success assertEquals(true, res3.success()); // 3) Unparseable temporal settings (in theory won't validate but we can test here) final DataBucketBean bucket4 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/temporal/malformed") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, "bananas") .done() .get()) .done() .get()) .done() .get(); CompletableFuture<BasicMessageBean> cf4 = _index_service.getDataService().get().handleAgeOutRequest(bucket4); BasicMessageBean res4 = cf4.get(); // no temporal settings => returns success assertEquals(false, res4.success()); }
@Test public void test_getStreamingTopology() throws UnsupportedFileSystemException, InterruptedException, ExecutionException { final DataBucketBean bucket = createBucket("test_tech_id_stream"); final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-topology-example.jar"; final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1)); final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar"; final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2)); List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2); ////////////////////////////////////////////////////// // 1) Check - if called with an error, then just passes that error along final BasicMessageBean error = SharedErrorUtils.buildErrorMessage("test_source", "test_message", "test_error"); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test1 = DataBucketChangeActor.getStreamingTopology( bucket, new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2", Validation.fail(error)); assertTrue("Got error back", test1.isFail()); assertEquals("test_source", test1.fail().source()); assertEquals("test_message", test1.fail().command()); assertEquals("test_error", test1.fail().message()); ////////////////////////////////////////////////////// // 2) Check the error handling inside getStreamingTopology final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test2_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_2b", Tuples._2T(null, null)) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2a")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2a", Validation.success(test2_input)); assertTrue("Got error back", test2a.isFail()); assertEquals("test_source2a", test2a.fail().source()); assertEquals("BucketActionOfferMessage", test2a.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2b")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2b", Validation.success(test2_input)); assertTrue("Got error back", test2b.isFail()); assertEquals("test_source2b", test2b.fail().source()); assertEquals("BucketActionOfferMessage", test2b.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); ////////////////////////////////////////////////////// // 3) OK now it will actually do something final String java_name = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_tech_id_stream.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name + ": " + new File(java_name).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(0), _service_context.getStorageService(), "test3", "test3") .get(); if (cached_file.isFail()) { fail("About to crash with: " + cached_file.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name).exists()); // OK the setup is done and validated now actually test the underlying call: final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3 = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3_input)); if (test3.isFail()) { fail("About to crash with: " + test3.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3.isSuccess()); assertTrue("topology created: ", test3.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3.success().getClass().getName()); // (Try again but with failing version, due to class not found) final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3a_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_fail", Tuples._2T(lib_elements.get(3), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_fail")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3a_input)); assertTrue("Got error back", test3a.isFail()); assertTrue( "Right error: " + test3a.fail().message(), test3a.fail().message().contains("com.ikanow.aleph2.test.example.ExampleStreamTopology")); // Now check with the "not just the harvest tech" flag set final String java_name2 = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_module_id.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name2 + ": " + new File(java_name2).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file2 = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(1), _service_context.getStorageService(), "test3b", "test3b") .get(); if (cached_file2.isFail()) { fail("About to crash with: " + cached_file2.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name2).exists()); final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3b_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .put("test_module_id", Tuples._2T(lib_elements.get(1), cached_file.success())) .build(); final EnrichmentControlMetadataBean enrichment_module = new EnrichmentControlMetadataBean( "test_tech_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id"), null, null); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3b", Validation.success(test3b_input)); if (test3b.isFail()) { fail("About to crash with: " + test3b.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3b.isSuccess()); assertTrue("topology created: ", test3b.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3b.success().getClass().getName()); // TODO add a test for disabled streaming but config given (should default to passthrough top // and // ignore given topology }
@Test public void test_cacheJars() throws UnsupportedFileSystemException, InterruptedException, ExecutionException { try { // Preamble: // 0) Insert 2 library beans into the management db final DataBucketBean bucket = createBucket("test_tech_id_stream"); final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example.jar"; final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1)); final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar"; final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2)); List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2); final IManagementDbService underlying_db = _service_context.getService(IManagementDbService.class, Optional.empty()).get(); final IManagementCrudService<SharedLibraryBean> library_crud = underlying_db.getSharedLibraryStore(); library_crud.deleteDatastore(); assertEquals("Cleansed library store", 0L, (long) library_crud.countObjects().get()); library_crud.storeObjects(lib_elements).get(); assertEquals("Should have 4 library beans", 4L, (long) library_crud.countObjects().get()); // 0a) Check with no streaming, gets nothing { CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure = DataBucketChangeActor.cacheJars( bucket, _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(), _service_context.getStorageService(), _service_context, "test1_source", "test1_command"); if (reply_structure.get().isFail()) { fail("About to crash with: " + reply_structure.get().fail().message()); } assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess()); final Map<String, Tuple2<SharedLibraryBean, String>> reply_map = reply_structure.get().success(); assertEquals(0L, reply_map.size()); // (both modules, 1x for _id and 1x for name) } // 0b) Create the more complex bucket final EnrichmentControlMetadataBean enrichment_module = new EnrichmentControlMetadataBean( "test_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id"), null, new LinkedHashMap<>()); final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module) .done(); // 1) Normal operation CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure = DataBucketChangeActor.cacheJars( bucket2, _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(), _service_context.getStorageService(), _service_context, "test1_source", "test1_command"); if (reply_structure.get().isFail()) { fail("About to crash with: " + reply_structure.get().fail().message()); } assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess()); final Map<String, Tuple2<SharedLibraryBean, String>> reply_map = reply_structure.get().success(); assertEquals( "Should have 4 beans: " + reply_map.toString(), 4L, reply_map.size()); // (both modules, 1x for _id and 1x for name) // 3) Couple of error cases: final EnrichmentControlMetadataBean enrichment_module2 = new EnrichmentControlMetadataBean( "test_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id", "failtest"), null, new LinkedHashMap<>()); final DataBucketBean bucket3 = BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module2) .done(); CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure3 = DataBucketChangeActor.cacheJars( bucket3, _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(), _service_context.getStorageService(), _service_context, "test2_source", "test2_command"); assertTrue("cacheJars should return error", reply_structure3.get().isFail()); } catch (Exception e) { System.out.println(ErrorUtils.getLongForm("guice? {0}", e)); throw e; } }
public void start() { final String hostname = _local_actor_context.getInformationService().getHostname(); final int MAX_ZK_ATTEMPTS = 6; if (!_core_distributed_services.waitForAkkaJoin( Optional.of(Duration.create(60L, TimeUnit.SECONDS)))) { _core_distributed_services.getAkkaSystem().terminate(); // (last ditch attempt to recover) throw new RuntimeException("Problem with CDS/Akka, try to terminate"); } //////////////////////////////////////////////////////////////// // HARVEST if (_service_config.harvest_enabled()) { // Create a bucket change actor and register it vs the local message bus final ActorRef handler = _local_actor_context .getActorSystem() .actorOf( Props.create( com.ikanow.aleph2.data_import_manager.harvest.actors .DataBucketHarvestChangeActor.class), hostname + ActorNameUtils.HARVEST_BUCKET_CHANGE_SUFFIX); _logger.info( ErrorUtils.get( "Attaching harvest DataBucketHarvestChangeActor {0} to bus {1}", handler, ActorUtils.BUCKET_ACTION_EVENT_BUS)); _db_actor_context .getBucketActionMessageBus() .subscribe(handler, ActorUtils.BUCKET_ACTION_EVENT_BUS); _logger.info( ErrorUtils.get("Registering {1} with {0}", ActorUtils.BUCKET_ACTION_ZOOKEEPER, hostname)); for (int i = 0; i <= MAX_ZK_ATTEMPTS; ++i) { try { _core_distributed_services .getCuratorFramework() .create() .creatingParentsIfNeeded() .withMode(CreateMode.EPHEMERAL) .forPath(ActorUtils.BUCKET_ACTION_ZOOKEEPER + "/" + hostname); break; } catch (Exception e) { _logger.warn( ErrorUtils.getLongForm( "Failed to register with Zookeeper: {0}, retrying={1}", e, i < MAX_ZK_ATTEMPTS)); try { Thread.sleep(10000L); } catch (Exception __) { } } } Runtime.getRuntime() .addShutdownHook( new Thread( Lambdas.wrap_runnable_u( () -> { _logger.info( "Shutting down IkanowV1SynchronizationModule subservice=v1_sync_service"); _core_distributed_services .getCuratorFramework() .delete() .deletingChildrenIfNeeded() .forPath(ActorUtils.BUCKET_ACTION_ZOOKEEPER + "/" + hostname); }))); _logger.info("Starting IkanowV1SynchronizationModule subservice=v1_sync_service"); } //////////////////////////////////////////////////////////////// // ANALYTICS if (_service_config.analytics_enabled()) { // Create a analytics bucket change actor and register it vs the local message bus final ActorRef analytics_handler = _local_actor_context .getActorSystem() .actorOf( Props.create( com.ikanow.aleph2.data_import_manager.analytics.actors .DataBucketAnalyticsChangeActor.class), hostname + ActorNameUtils.ANALYTICS_BUCKET_CHANGE_SUFFIX); _logger.info( ErrorUtils.get( "Attaching analytics DataBucketAnalyticsChangeActor {0} to bus {1}", analytics_handler, ActorUtils.BUCKET_ANALYTICS_EVENT_BUS)); _db_actor_context .getAnalyticsMessageBus() .subscribe(analytics_handler, ActorUtils.BUCKET_ANALYTICS_EVENT_BUS); // Create trigger supervisor and worker final Optional<ActorRef> trigger_supervisor = _core_distributed_services.createSingletonActor( hostname + ActorNameUtils.ANALYTICS_TRIGGER_SUPERVISOR_SUFFIX, ImmutableSet.<String>builder() .add( DistributedServicesPropertyBean.ApplicationNames.DataImportManager.toString()) .build(), Props.create( com.ikanow.aleph2.data_import_manager.analytics.actors .AnalyticsTriggerSupervisorActor.class)); if (!trigger_supervisor.isPresent()) { _logger.error( "Analytic trigger supervisor didn't start, unknown reason (wrong CDS application_name?)"); } final ActorRef trigger_worker = _local_actor_context .getActorSystem() .actorOf( Props.create( com.ikanow.aleph2.data_import_manager.analytics.actors .AnalyticsTriggerWorkerActor.class), hostname + ActorNameUtils.ANALYTICS_TRIGGER_WORKER_SUFFIX); _logger.info( ErrorUtils.get( "Attaching analytics AnalyticsTriggerWorkerActor {0} to bus {1}", trigger_worker, ActorUtils.ANALYTICS_TRIGGER_BUS)); _db_actor_context .getAnalyticsTriggerBus() .subscribe(trigger_worker, ActorUtils.ANALYTICS_TRIGGER_BUS); _logger.info( ErrorUtils.get( "Registering {1} with {0}", ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER, hostname)); for (int i = 0; i <= MAX_ZK_ATTEMPTS; ++i) { try { _core_distributed_services .getCuratorFramework() .create() .creatingParentsIfNeeded() .withMode(CreateMode.EPHEMERAL) .forPath(ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER + "/" + hostname); break; } catch (Exception e) { _logger.warn( ErrorUtils.getLongForm( "Failed to register with Zookeeper: {0}, retrying={1}", e, i < MAX_ZK_ATTEMPTS)); try { Thread.sleep(10000L); } catch (Exception __) { } } } Runtime.getRuntime() .addShutdownHook( new Thread( Lambdas.wrap_runnable_u( () -> { _logger.info( "Shutting down IkanowV1SynchronizationModule subservice=analytics"); _core_distributed_services .getCuratorFramework() .delete() .deletingChildrenIfNeeded() .forPath(ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER + "/" + hostname); }))); _logger.info("Starting IkanowV1SynchronizationModule subservice=analytics"); } //////////////////////////////////////////////////////////////// // GOVERNANCE if (_service_config.governance_enabled()) { _core_distributed_services.createSingletonActor( hostname + ".governance.actors.DataAgeOutSupervisor", ImmutableSet.<String>builder() .add(DistributedServicesPropertyBean.ApplicationNames.DataImportManager.toString()) .build(), Props.create(DataAgeOutSupervisor.class)); _logger.info("Starting IkanowV1SynchronizationModule subservice=governance"); } for (; ; ) { try { Thread.sleep(10000); } catch (Exception e) { } } }
/** * Given a bucket ...returns either - a future containing the first error encountered, _or_ a map * (both name and id as keys) of path names (and guarantee that the file has been cached when the * future completes) * * @param bucket * @param management_db * @param globals * @param fs * @param handler_for_errors * @param msg_for_errors * @return a future containing the first error encountered, _or_ a map (both name and id as keys) * of path names */ @SuppressWarnings("unchecked") protected static <M> CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> cacheJars( final DataBucketBean bucket, final IManagementDbService management_db, final GlobalPropertiesBean globals, final IStorageService fs, final IServiceContext context, final String handler_for_errors, final M msg_for_errors) { try { MethodNamingHelper<SharedLibraryBean> helper = BeanTemplateUtils.from(SharedLibraryBean.class); final Optional<QueryComponent<SharedLibraryBean>> spec = getQuery(bucket); if (!spec.isPresent()) { return CompletableFuture.completedFuture( Validation.<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>success( Collections.emptyMap())); } return management_db .getSharedLibraryStore() .secured(context, new AuthorizationBean(bucket.owner_id())) .getObjectsBySpec(spec.get()) .thenComposeAsync( cursor -> { // This is a map of futures from the cache call - either an error or the path name // note we use a tuple of (id, name) as the key and then flatten out later final Map< Tuple2<String, String>, Tuple2< SharedLibraryBean, CompletableFuture<Validation<BasicMessageBean, String>>>> map_of_futures = StreamSupport.stream(cursor.spliterator(), true) .filter( lib -> { return true; }) .collect( Collectors .<SharedLibraryBean, Tuple2<String, String>, Tuple2< SharedLibraryBean, CompletableFuture< Validation<BasicMessageBean, String>>>> toMap( // want to keep both the name and id versions - will // flatten out below lib -> Tuples._2T(lib.path_name(), lib._id()), // (key) // spin off a future in which the file is being copied - // save the shared library bean also lib -> Tuples._2T( lib, // (value) JarCacheUtils.getCachedJar( globals.local_cached_jar_dir(), lib, fs, handler_for_errors, msg_for_errors)))); // denest from map of futures to future of maps, also handle any errors here: // (some sort of "lift" function would be useful here - this are a somewhat // inelegant few steps) final CompletableFuture<Validation<BasicMessageBean, String>>[] futures = (CompletableFuture<Validation<BasicMessageBean, String>>[]) map_of_futures .values() .stream() .map(t2 -> t2._2()) .collect(Collectors.toList()) .toArray(new CompletableFuture[0]); // (have to embed this thenApply instead of bringing it outside as part of the // toCompose chain, because otherwise we'd lose map_of_futures scope) return CompletableFuture.allOf(futures) .<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> thenApply( f -> { try { final Map<String, Tuple2<SharedLibraryBean, String>> almost_there = map_of_futures .entrySet() .stream() .flatMap( kv -> { final Validation<BasicMessageBean, String> ret = kv.getValue() ._2() .join(); // (must have already returned if // here return ret .<Stream< Tuple2< String, Tuple2<SharedLibraryBean, String>>>> validation( // Error: err -> { throw new RuntimeException( err.message()); } // (not ideal, but will do) , // Normal: s -> { return Arrays.asList( Tuples._2T( kv.getKey()._1(), Tuples._2T( kv.getValue()._1(), s)), // result object // with path_name Tuples._2T( kv.getKey()._2(), Tuples._2T( kv.getValue()._1(), s))) // result object // with id .stream(); }); }) .collect( Collectors .<Tuple2<String, Tuple2<SharedLibraryBean, String>>, String, Tuple2<SharedLibraryBean, String>> toMap( idname_path -> idname_path._1(), // (key) idname_path -> idname_path._2() // (value) )); return Validation .<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> success(almost_there); } catch ( Exception e) { // handle the exception thrown above containing the // message bean from whatever the original error was! return Validation .<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> fail( SharedErrorUtils.buildErrorMessage( handler_for_errors.toString(), msg_for_errors, e.getMessage())); } }); }); } catch (Throwable e) { // (can only occur if the DB call errors) return CompletableFuture.completedFuture( Validation.fail( SharedErrorUtils.buildErrorMessage( handler_for_errors.toString(), msg_for_errors, ErrorUtils.getLongForm( SharedErrorUtils.ERROR_CACHING_SHARED_LIBS, e, bucket.full_name())))); } }
/** * Talks to the topology module - this top level function just sets the classloader up and creates * the module, then calls talkToStream to do the talking * * @param bucket * @param libs * @param harvest_tech_only * @param m * @param source * @return */ protected static Validation<BasicMessageBean, IEnrichmentStreamingTopology> getStreamingTopology( final DataBucketBean bucket, final BucketActionMessage m, final String source, final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> err_or_libs // "pipeline element" ) { try { return err_or_libs.<Validation<BasicMessageBean, IEnrichmentStreamingTopology>>validation( // Error: error -> Validation.fail(error), // Normal libs -> { // Easy case, if streaming is turned off, just pass data through this layer if (!Optional.ofNullable(bucket.streaming_enrichment_topology().enabled()).orElse(true)) return Validation.success(new PassthroughTopology()); // Easy case, if libs is empty then use the default streaming topology if (libs.isEmpty()) { return Validation.success(new PassthroughTopology()); } final Tuple2<SharedLibraryBean, String> libbean_path = libs.values() .stream() .filter( t2 -> (null != t2._1()) && (null != Optional.ofNullable( t2._1().streaming_enrichment_entry_point()) .orElse(t2._1().misc_entry_point()))) .findFirst() .orElse(null); if ((null == libbean_path) || (null == libbean_path._2())) { // Nice easy error case, probably can't ever happen return Validation.fail( SharedErrorUtils.buildErrorMessage( source, m, SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)")); } final Validation<BasicMessageBean, IEnrichmentStreamingTopology> ret_val = ClassloaderUtils.getFromCustomClasspath( IEnrichmentStreamingTopology.class, Optional.ofNullable(libbean_path._1().streaming_enrichment_entry_point()) .orElse(libbean_path._1().misc_entry_point()), Optional.of(libbean_path._2()), libs.values().stream().map(lp -> lp._2()).collect(Collectors.toList()), source, m); return ret_val; }); } catch (Throwable t) { return Validation.fail( SharedErrorUtils.buildErrorMessage( source, m, ErrorUtils.getLongForm( SharedErrorUtils.ERROR_LOADING_CLASS, t, bucket.harvest_technology_name_or_id()))); } }
protected static CompletableFuture<BucketActionReplyMessage> talkToStream( final IStormController storm_controller, final DataBucketBean bucket, final BucketActionMessage m, final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_user_topology, final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> err_or_map, final String source, final StreamingEnrichmentContext context, final String yarn_config_dir, final String cached_jars_dir) { try { // handle getting the user libs final List<String> user_lib_paths = err_or_map.<List<String>>validation( fail -> Collections.emptyList() // (going to die soon anyway) , success -> success .values() .stream() .map(tuple -> tuple._2.replaceFirst("file:", "")) .collect(Collectors.toList())); return err_or_user_topology.<CompletableFuture<BucketActionReplyMessage>>validation( // ERROR getting enrichment topology error -> { return CompletableFuture.completedFuture(new BucketActionHandlerMessage(source, error)); }, // NORMAL grab enrichment topology enrichment_topology -> { final String entry_point = enrichment_topology.getClass().getName(); context.setBucket(bucket); context.setUserTopologyEntryPoint(entry_point); // also set the library bean - note if here then must have been set, else // IHarvestTechnologyModule wouldn't exist err_or_map.forEach( map -> { context.setLibraryConfig( map.values() .stream() .map(t2 -> t2._1()) .filter( lib -> entry_point.equals(lib.misc_entry_point()) || entry_point.equals(lib.streaming_enrichment_entry_point())) .findFirst() .orElse(BeanTemplateUtils.build(SharedLibraryBean.class).done().get())); // (else this is a passthrough topology, so just use a dummy library bean) }); _logger.info( "Set active class=" + enrichment_topology.getClass() + " message=" + m.getClass().getSimpleName() + " bucket=" + bucket.full_name()); return Patterns.match(m) .<CompletableFuture<BucketActionReplyMessage>>andReturn() .when( BucketActionMessage.DeleteBucketActionMessage.class, msg -> { return StormControllerUtil.stopJob(storm_controller, bucket); }) .when( BucketActionMessage.NewBucketActionMessage.class, msg -> { if (!msg.is_suspended()) return StormControllerUtil.startJob( storm_controller, bucket, context, user_lib_paths, enrichment_topology, cached_jars_dir); else return StormControllerUtil.stopJob( storm_controller, bucket); // (nothing to do but just do this to return something // sensible) }) .when( BucketActionMessage.UpdateBucketActionMessage.class, msg -> { if (msg.is_enabled()) return StormControllerUtil.restartJob( storm_controller, bucket, context, user_lib_paths, enrichment_topology, cached_jars_dir); else return StormControllerUtil.stopJob(storm_controller, bucket); }) .when( BucketActionMessage.TestBucketActionMessage.class, msg -> { // TODO (ALEPH-25): in the future run this test with local storm rather than // remote storm_controller return StormControllerUtil.restartJob( storm_controller, bucket, context, user_lib_paths, enrichment_topology, cached_jars_dir); }) .otherwise( msg -> { return CompletableFuture.completedFuture( new BucketActionHandlerMessage( source, new BasicMessageBean( new Date(), false, null, "Unknown message", 0, "Unknown message", null))); }); }); } catch (Throwable e) { // (trying to use Validation to avoid this, but just in case...) return CompletableFuture.completedFuture( new BucketActionHandlerMessage( source, new BasicMessageBean( new Date(), false, null, ErrorUtils.getLongForm("Error loading streaming class: {0}", e), 0, ErrorUtils.getLongForm("Error loading streaming class: {0}", e), null))); } }
/* (non-Javadoc) * @see akka.actor.AbstractActor#receive() */ @Override public PartialFunction<Object, BoxedUnit> receive() { return ReceiveBuilder.match( BucketActionMessage.class, m -> !m.handling_clients().isEmpty() && !m.handling_clients() .contains(_context.getInformationService().getHostname()), __ -> {}) // (do nothing if it's not for me) .match( BucketActionOfferMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket {3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (this isn't async so doesn't require any futures) final boolean accept_or_ignore = new File(_globals.local_yarn_config_dir() + File.separator + "storm.yaml") .exists(); final BucketActionReplyMessage reply = accept_or_ignore ? new BucketActionReplyMessage.BucketActionWillAcceptMessage(hostname) : new BucketActionReplyMessage.BucketActionIgnoredMessage(hostname); closing_sender.tell(reply, closing_self); }) .match( BucketActionMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket={3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (cacheJars can't throw checked or unchecked in this thread, only from within // exceptions) cacheJars( m.bucket(), _management_db, _globals, _fs, _context.getServiceContext(), hostname, m) .thenComposeAsync( err_or_map -> { final StreamingEnrichmentContext e_context = _context.getNewStreamingEnrichmentContext(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_tech_module = getStreamingTopology(m.bucket(), m, hostname, err_or_map); final CompletableFuture<BucketActionReplyMessage> ret = talkToStream( _storm_controller, m.bucket(), m, err_or_tech_module, err_or_map, hostname, e_context, _globals.local_yarn_config_dir(), _globals.local_cached_jar_dir()); return ret; }) .thenAccept( reply -> { // (reply can contain an error or successful reply, they're the // same bean type) // Some information logging: Patterns.match(reply) .andAct() .when( BucketActionHandlerMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}, success={2}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.reply().success()))) .when( BucketActionReplyMessage.BucketActionWillAcceptMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name()))) .otherwise( msg -> _logger.info( ErrorUtils.get( "Unusual reply to message={0}, type={2}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.getClass().getSimpleName()))); closing_sender.tell(reply, closing_self); }) .exceptionally( e -> { // another bit of error handling that shouldn't ever be called but is a // useful backstop // Some information logging: _logger.warn( "Unexpected error replying to '{0}': error = {1}, bucket={2}", BeanTemplateUtils.toJson(m).toString(), ErrorUtils.getLongForm("{0}", e), m.bucket().full_name()); final BasicMessageBean error_bean = SharedErrorUtils.buildErrorMessage( hostname, m, ErrorUtils.getLongForm( StreamErrorUtils.STREAM_UNKNOWN_ERROR, e, m.bucket().full_name())); closing_sender.tell( new BucketActionHandlerMessage(hostname, error_bean), closing_self); return null; }); }) .build(); }