/**
   * Initializes the storm instance
   *
   * @return a real storm controller if possible, else a no controller
   */
  @SuppressWarnings("unchecked")
  public static IStormController getController() {
    final GlobalPropertiesBean globals =
        Lambdas.get(
            () -> {
              try {
                return BeanTemplateUtils.from(
                    PropertiesUtils.getSubConfig(
                            ModuleUtils.getStaticConfig(), GlobalPropertiesBean.PROPERTIES_ROOT)
                        .orElse(null),
                    GlobalPropertiesBean.class);
              } catch (IOException e) {
                _logger.error(
                    ErrorUtils.getLongForm(
                        "Couldn't set globals property bean in storm harvest tech onInit: {0}", e));
                return null;
              }
            });
    if (null == globals) {
      return new NoStormController();
    }
    _logger.info(
        "Loading storm config from: "
            + globals.local_yarn_config_dir()
            + File.separator
            + "storm.yaml");
    Yaml yaml = new Yaml();
    InputStream input;
    Map<String, Object> object;
    try {
      input =
          new FileInputStream(
              new File(globals.local_yarn_config_dir() + File.separator + "storm.yaml"));
      object = (Map<String, Object>) yaml.load(input);
    } catch (FileNotFoundException e) {
      _logger.error(
          ErrorUtils.getLongForm("Error reading storm.yaml in storm harvest tech onInit: {0}", e));
      object = new HashMap<String, Object>();
    }

    if (object.containsKey(backtype.storm.Config.NIMBUS_HOST)) {
      _logger.info("starting in remote mode v5");
      _logger.info(object.get(backtype.storm.Config.NIMBUS_HOST));
      // run in distributed mode
      IStormController storm_controller =
          StormControllerUtil.getRemoteStormController(
              (String) object.get(backtype.storm.Config.NIMBUS_HOST),
              (int) object.get(backtype.storm.Config.NIMBUS_THRIFT_PORT),
              (String) object.get(backtype.storm.Config.STORM_THRIFT_TRANSPORT_PLUGIN));

      return storm_controller;
    } else {
      return new NoStormController();
    }
  }
Ejemplo n.º 2
0
    @Override
    protected void configure() {
      final Config config = ModuleUtils.getStaticConfig();
      try {
        final DataImportConfigurationBean bean =
            BeanTemplateUtils.from(
                PropertiesUtils.getSubConfig(config, DataImportConfigurationBean.PROPERTIES_ROOT)
                    .orElse(null),
                DataImportConfigurationBean.class);
        this.bind(DataImportConfigurationBean.class).toInstance(bean);

        this.bind(AnalyticStateTriggerCheckFactory.class).in(Scopes.SINGLETON);
      } catch (Exception e) {
        throw new RuntimeException(
            ErrorUtils.get(
                ErrorUtils.INVALID_CONFIG_ERROR,
                DataImportConfigurationBean.class.toString(),
                config.getConfig(DataImportConfigurationBean.PROPERTIES_ROOT)),
            e);
      }
    }
Ejemplo n.º 3
0
  /**
   * Entry point
   *
   * @param args - config_file source_key harvest_tech_id
   * @throws Exception
   */
  public static void main(final String[] args) {
    try {
      if (args.length < 1) {
        System.out.println("CLI: config_file");
        System.exit(-1);
      }
      System.out.println("Running with command line: " + Arrays.toString(args));
      final Config config = ConfigFactory.parseFile(new File(args[0]));

      final DataImportManagerModule app =
          ModuleUtils.initializeApplication(
              Arrays.asList(new Module()),
              Optional.of(config),
              Either.left(DataImportManagerModule.class));
      app.start();
    } catch (Throwable e) {
      _logger.error(ErrorUtils.getLongForm("Exception reached main(): {0}", e));
      try {
        e.printStackTrace();
      } catch (Exception e2) { // the exception failed!
      }
      System.exit(-1);
    }
  }
  @Test
  public void test_ageOut() throws IOException, InterruptedException, ExecutionException {

    // Call test_endToEnd_autoTime to create 5 time based indexes
    // 2015-01-01 -> 2015-05-01
    // How far is now from 2015-05-03
    final Date d = TimeUtils.getDateFromSuffix("2015-03-02").success();
    final long total_time_ms = new Date().getTime() - d.getTime();
    final long total_days = total_time_ms / (1000L * 3600L * 24L);
    final String age_out = ErrorUtils.get("{0} days", total_days);

    final DataBucketBean bucket =
        BeanTemplateUtils.build(DataBucketBean.class)
            .with("full_name", "/test/end-end/auto-time")
            .with(
                DataBucketBean::data_schema,
                BeanTemplateUtils.build(DataSchemaBean.class)
                    .with(
                        DataSchemaBean::temporal_schema,
                        BeanTemplateUtils.build(TemporalSchemaBean.class)
                            .with(TemporalSchemaBean::exist_age_max, age_out)
                            .done()
                            .get())
                    .done()
                    .get())
            .done()
            .get();

    final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket);

    test_endToEnd_autoTime(false);

    _index_service
        ._crud_factory
        .getClient()
        .admin()
        .indices()
        .prepareCreate(template_name + "_2015-03-01_1")
        .execute()
        .actionGet();

    final GetMappingsResponse gmr =
        _index_service
            ._crud_factory
            .getClient()
            .admin()
            .indices()
            .prepareGetMappings(template_name + "*")
            .execute()
            .actionGet();
    assertEquals(6, gmr.getMappings().keys().size());

    CompletableFuture<BasicMessageBean> cf =
        _index_service.getDataService().get().handleAgeOutRequest(bucket);

    BasicMessageBean res = cf.get();

    assertEquals(true, res.success());
    assertTrue("sensible message: " + res.message(), res.message().contains(" 2 "));

    assertTrue(
        "Message marked as loggable: " + res.details(),
        Optional.ofNullable(res.details()).filter(m -> m.containsKey("loggable")).isPresent());

    System.out.println("Return from to delete: " + res.message());

    Thread.sleep(5000L); // give the indexes time to delete

    final GetMappingsResponse gmr2 =
        _index_service
            ._crud_factory
            .getClient()
            .admin()
            .indices()
            .prepareGetMappings(template_name + "*")
            .execute()
            .actionGet();
    assertEquals(3, gmr2.getMappings().keys().size());

    // Check some edge cases:

    // 1) Run it again, returns success but not loggable:

    CompletableFuture<BasicMessageBean> cf2 =
        _index_service.getDataService().get().handleAgeOutRequest(bucket);

    BasicMessageBean res2 = cf2.get();

    assertEquals(true, res2.success());
    assertTrue("sensible message: " + res2.message(), res2.message().contains(" 0 "));
    assertTrue(
        "Message _not_ marked as loggable: " + res2.details(),
        !Optional.ofNullable(res2.details()).map(m -> m.get("loggable")).isPresent());

    // 2) No temporal settings

    final DataBucketBean bucket3 =
        BeanTemplateUtils.build(DataBucketBean.class)
            .with("full_name", "/test/handle/age/out/delete/not/temporal")
            .with(
                DataBucketBean::data_schema,
                BeanTemplateUtils.build(DataSchemaBean.class).done().get())
            .done()
            .get();

    CompletableFuture<BasicMessageBean> cf3 =
        _index_service.getDataService().get().handleAgeOutRequest(bucket3);
    BasicMessageBean res3 = cf3.get();
    // no temporal settings => returns success
    assertEquals(true, res3.success());

    // 3) Unparseable temporal settings (in theory won't validate but we can test here)

    final DataBucketBean bucket4 =
        BeanTemplateUtils.build(DataBucketBean.class)
            .with("full_name", "/test/handle/age/out/delete/temporal/malformed")
            .with(
                DataBucketBean::data_schema,
                BeanTemplateUtils.build(DataSchemaBean.class)
                    .with(
                        DataSchemaBean::temporal_schema,
                        BeanTemplateUtils.build(TemporalSchemaBean.class)
                            .with(TemporalSchemaBean::exist_age_max, "bananas")
                            .done()
                            .get())
                    .done()
                    .get())
            .done()
            .get();

    CompletableFuture<BasicMessageBean> cf4 =
        _index_service.getDataService().get().handleAgeOutRequest(bucket4);
    BasicMessageBean res4 = cf4.get();
    // no temporal settings => returns success
    assertEquals(false, res4.success());
  }
Ejemplo n.º 5
0
  @Test
  public void test_getStreamingTopology()
      throws UnsupportedFileSystemException, InterruptedException, ExecutionException {
    final DataBucketBean bucket = createBucket("test_tech_id_stream");

    final String pathname1 =
        System.getProperty("user.dir") + "/misc_test_assets/simple-topology-example.jar";
    final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1));
    final String pathname2 =
        System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar";
    final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2));

    List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2);

    //////////////////////////////////////////////////////

    // 1) Check - if called with an error, then just passes that error along

    final BasicMessageBean error =
        SharedErrorUtils.buildErrorMessage("test_source", "test_message", "test_error");

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test1 =
        DataBucketChangeActor.getStreamingTopology(
            bucket,
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source2",
            Validation.fail(error));

    assertTrue("Got error back", test1.isFail());
    assertEquals("test_source", test1.fail().source());
    assertEquals("test_message", test1.fail().command());
    assertEquals("test_error", test1.fail().message());

    //////////////////////////////////////////////////////

    // 2) Check the error handling inside getStreamingTopology

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test2_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream_2b", Tuples._2T(null, null))
            .build();

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2a =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream_2a"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source2a",
            Validation.success(test2_input));

    assertTrue("Got error back", test2a.isFail());
    assertEquals("test_source2a", test2a.fail().source());
    assertEquals("BucketActionOfferMessage", test2a.fail().command());
    assertEquals(
        ErrorUtils.get(
            SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND,
            bucket.full_name(),
            "(unknown)"), // (cloned bucket above)
        test2a.fail().message());

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2b =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream_2b"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source2b",
            Validation.success(test2_input));

    assertTrue("Got error back", test2b.isFail());
    assertEquals("test_source2b", test2b.fail().source());
    assertEquals("BucketActionOfferMessage", test2b.fail().command());
    assertEquals(
        ErrorUtils.get(
            SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND,
            bucket.full_name(),
            "(unknown)"), // (cloned bucket above)
        test2a.fail().message());

    //////////////////////////////////////////////////////

    // 3) OK now it will actually do something

    final String java_name =
        _service_context.getGlobalProperties().local_cached_jar_dir()
            + File.separator
            + "test_tech_id_stream.cache.jar";

    _logger.info(
        "Needed to delete locally cached file? " + java_name + ": " + new File(java_name).delete());

    // Requires that the file has already been cached:
    final Validation<BasicMessageBean, String> cached_file =
        JarCacheUtils.getCachedJar(
                _service_context.getGlobalProperties().local_cached_jar_dir(),
                lib_elements.get(0),
                _service_context.getStorageService(),
                "test3",
                "test3")
            .get();

    if (cached_file.isFail()) {
      fail("About to crash with: " + cached_file.fail().message());
    }

    assertTrue("The cached file exists: " + java_name, new File(java_name).exists());

    // OK the setup is done and validated now actually test the underlying call:

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success()))
            .build();

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3 =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source3",
            Validation.success(test3_input));

    if (test3.isFail()) {
      fail("About to crash with: " + test3.fail().message());
    }
    assertTrue("getStreamingTopology call succeeded", test3.isSuccess());
    assertTrue("topology created: ", test3.success() != null);
    assertEquals(lib_elements.get(0).misc_entry_point(), test3.success().getClass().getName());

    // (Try again but with failing version, due to class not found)

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3a_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream_fail", Tuples._2T(lib_elements.get(3), cached_file.success()))
            .build();

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3a =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream_fail"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source3",
            Validation.success(test3a_input));

    assertTrue("Got error back", test3a.isFail());
    assertTrue(
        "Right error: " + test3a.fail().message(),
        test3a.fail().message().contains("com.ikanow.aleph2.test.example.ExampleStreamTopology"));

    // Now check with the "not just the harvest tech" flag set

    final String java_name2 =
        _service_context.getGlobalProperties().local_cached_jar_dir()
            + File.separator
            + "test_module_id.cache.jar";

    _logger.info(
        "Needed to delete locally cached file? "
            + java_name2
            + ": "
            + new File(java_name2).delete());

    // Requires that the file has already been cached:
    final Validation<BasicMessageBean, String> cached_file2 =
        JarCacheUtils.getCachedJar(
                _service_context.getGlobalProperties().local_cached_jar_dir(),
                lib_elements.get(1),
                _service_context.getStorageService(),
                "test3b",
                "test3b")
            .get();

    if (cached_file2.isFail()) {
      fail("About to crash with: " + cached_file2.fail().message());
    }

    assertTrue("The cached file exists: " + java_name, new File(java_name2).exists());

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3b_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success()))
            .put("test_module_id", Tuples._2T(lib_elements.get(1), cached_file.success()))
            .build();

    final EnrichmentControlMetadataBean enrichment_module =
        new EnrichmentControlMetadataBean(
            "test_tech_name",
            Collections.emptyList(),
            true,
            null,
            Arrays.asList("test_tech_id_stream", "test_module_id"),
            null,
            null);

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3b =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(DataBucketBean::streaming_enrichment_topology, enrichment_module)
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source3b",
            Validation.success(test3b_input));

    if (test3b.isFail()) {
      fail("About to crash with: " + test3b.fail().message());
    }
    assertTrue("getStreamingTopology call succeeded", test3b.isSuccess());
    assertTrue("topology created: ", test3b.success() != null);
    assertEquals(lib_elements.get(0).misc_entry_point(), test3b.success().getClass().getName());

    // TODO add a test for disabled streaming but config given (should default to passthrough top
    // and
    // ignore given topology
  }
Ejemplo n.º 6
0
  @Test
  public void test_cacheJars()
      throws UnsupportedFileSystemException, InterruptedException, ExecutionException {
    try {
      // Preamble:
      // 0) Insert 2 library beans into the management db

      final DataBucketBean bucket = createBucket("test_tech_id_stream");

      final String pathname1 =
          System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example.jar";
      final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1));
      final String pathname2 =
          System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar";
      final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2));

      List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2);

      final IManagementDbService underlying_db =
          _service_context.getService(IManagementDbService.class, Optional.empty()).get();
      final IManagementCrudService<SharedLibraryBean> library_crud =
          underlying_db.getSharedLibraryStore();
      library_crud.deleteDatastore();
      assertEquals("Cleansed library store", 0L, (long) library_crud.countObjects().get());
      library_crud.storeObjects(lib_elements).get();

      assertEquals("Should have 4 library beans", 4L, (long) library_crud.countObjects().get());

      // 0a) Check with no streaming, gets nothing
      {
        CompletableFuture<
                Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
            reply_structure =
                DataBucketChangeActor.cacheJars(
                    bucket,
                    _service_context.getCoreManagementDbService(),
                    _service_context.getGlobalProperties(),
                    _service_context.getStorageService(),
                    _service_context,
                    "test1_source",
                    "test1_command");

        if (reply_structure.get().isFail()) {
          fail("About to crash with: " + reply_structure.get().fail().message());
        }
        assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess());

        final Map<String, Tuple2<SharedLibraryBean, String>> reply_map =
            reply_structure.get().success();

        assertEquals(0L, reply_map.size()); // (both modules, 1x for _id and 1x for name)
      }

      // 0b) Create the more complex bucket

      final EnrichmentControlMetadataBean enrichment_module =
          new EnrichmentControlMetadataBean(
              "test_name",
              Collections.emptyList(),
              true,
              null,
              Arrays.asList("test_tech_id_stream", "test_module_id"),
              null,
              new LinkedHashMap<>());

      final DataBucketBean bucket2 =
          BeanTemplateUtils.clone(bucket)
              .with(DataBucketBean::streaming_enrichment_topology, enrichment_module)
              .done();

      // 1) Normal operation

      CompletableFuture<
              Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
          reply_structure =
              DataBucketChangeActor.cacheJars(
                  bucket2,
                  _service_context.getCoreManagementDbService(),
                  _service_context.getGlobalProperties(),
                  _service_context.getStorageService(),
                  _service_context,
                  "test1_source",
                  "test1_command");

      if (reply_structure.get().isFail()) {
        fail("About to crash with: " + reply_structure.get().fail().message());
      }
      assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess());

      final Map<String, Tuple2<SharedLibraryBean, String>> reply_map =
          reply_structure.get().success();

      assertEquals(
          "Should have 4 beans: " + reply_map.toString(),
          4L,
          reply_map.size()); // (both modules, 1x for _id and 1x for name)

      // 3) Couple of error cases:

      final EnrichmentControlMetadataBean enrichment_module2 =
          new EnrichmentControlMetadataBean(
              "test_name",
              Collections.emptyList(),
              true,
              null,
              Arrays.asList("test_tech_id_stream", "test_module_id", "failtest"),
              null,
              new LinkedHashMap<>());

      final DataBucketBean bucket3 =
          BeanTemplateUtils.clone(bucket)
              .with(DataBucketBean::streaming_enrichment_topology, enrichment_module2)
              .done();

      CompletableFuture<
              Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
          reply_structure3 =
              DataBucketChangeActor.cacheJars(
                  bucket3,
                  _service_context.getCoreManagementDbService(),
                  _service_context.getGlobalProperties(),
                  _service_context.getStorageService(),
                  _service_context,
                  "test2_source",
                  "test2_command");

      assertTrue("cacheJars should return error", reply_structure3.get().isFail());
    } catch (Exception e) {
      System.out.println(ErrorUtils.getLongForm("guice? {0}", e));
      throw e;
    }
  }
Ejemplo n.º 7
0
  public void start() {
    final String hostname = _local_actor_context.getInformationService().getHostname();
    final int MAX_ZK_ATTEMPTS = 6;

    if (!_core_distributed_services.waitForAkkaJoin(
        Optional.of(Duration.create(60L, TimeUnit.SECONDS)))) {
      _core_distributed_services.getAkkaSystem().terminate(); // (last ditch attempt to recover)
      throw new RuntimeException("Problem with CDS/Akka, try to terminate");
    }

    ////////////////////////////////////////////////////////////////

    // HARVEST

    if (_service_config.harvest_enabled()) {
      // Create a bucket change actor and register it vs the local message bus
      final ActorRef handler =
          _local_actor_context
              .getActorSystem()
              .actorOf(
                  Props.create(
                      com.ikanow.aleph2.data_import_manager.harvest.actors
                          .DataBucketHarvestChangeActor.class),
                  hostname + ActorNameUtils.HARVEST_BUCKET_CHANGE_SUFFIX);

      _logger.info(
          ErrorUtils.get(
              "Attaching harvest DataBucketHarvestChangeActor {0} to bus {1}",
              handler, ActorUtils.BUCKET_ACTION_EVENT_BUS));

      _db_actor_context
          .getBucketActionMessageBus()
          .subscribe(handler, ActorUtils.BUCKET_ACTION_EVENT_BUS);

      _logger.info(
          ErrorUtils.get("Registering {1} with {0}", ActorUtils.BUCKET_ACTION_ZOOKEEPER, hostname));

      for (int i = 0; i <= MAX_ZK_ATTEMPTS; ++i) {
        try {
          _core_distributed_services
              .getCuratorFramework()
              .create()
              .creatingParentsIfNeeded()
              .withMode(CreateMode.EPHEMERAL)
              .forPath(ActorUtils.BUCKET_ACTION_ZOOKEEPER + "/" + hostname);
          break;
        } catch (Exception e) {
          _logger.warn(
              ErrorUtils.getLongForm(
                  "Failed to register with Zookeeper: {0}, retrying={1}", e, i < MAX_ZK_ATTEMPTS));
          try {
            Thread.sleep(10000L);
          } catch (Exception __) {
          }
        }
      }
      Runtime.getRuntime()
          .addShutdownHook(
              new Thread(
                  Lambdas.wrap_runnable_u(
                      () -> {
                        _logger.info(
                            "Shutting down IkanowV1SynchronizationModule subservice=v1_sync_service");
                        _core_distributed_services
                            .getCuratorFramework()
                            .delete()
                            .deletingChildrenIfNeeded()
                            .forPath(ActorUtils.BUCKET_ACTION_ZOOKEEPER + "/" + hostname);
                      })));
      _logger.info("Starting IkanowV1SynchronizationModule subservice=v1_sync_service");
    }

    ////////////////////////////////////////////////////////////////

    // ANALYTICS

    if (_service_config.analytics_enabled()) {
      // Create a analytics bucket change actor and register it vs the local message bus
      final ActorRef analytics_handler =
          _local_actor_context
              .getActorSystem()
              .actorOf(
                  Props.create(
                      com.ikanow.aleph2.data_import_manager.analytics.actors
                          .DataBucketAnalyticsChangeActor.class),
                  hostname + ActorNameUtils.ANALYTICS_BUCKET_CHANGE_SUFFIX);

      _logger.info(
          ErrorUtils.get(
              "Attaching analytics DataBucketAnalyticsChangeActor {0} to bus {1}",
              analytics_handler, ActorUtils.BUCKET_ANALYTICS_EVENT_BUS));

      _db_actor_context
          .getAnalyticsMessageBus()
          .subscribe(analytics_handler, ActorUtils.BUCKET_ANALYTICS_EVENT_BUS);

      // Create trigger supervisor and worker

      final Optional<ActorRef> trigger_supervisor =
          _core_distributed_services.createSingletonActor(
              hostname + ActorNameUtils.ANALYTICS_TRIGGER_SUPERVISOR_SUFFIX,
              ImmutableSet.<String>builder()
                  .add(
                      DistributedServicesPropertyBean.ApplicationNames.DataImportManager.toString())
                  .build(),
              Props.create(
                  com.ikanow.aleph2.data_import_manager.analytics.actors
                      .AnalyticsTriggerSupervisorActor.class));

      if (!trigger_supervisor.isPresent()) {
        _logger.error(
            "Analytic trigger supervisor didn't start, unknown reason (wrong CDS application_name?)");
      }

      final ActorRef trigger_worker =
          _local_actor_context
              .getActorSystem()
              .actorOf(
                  Props.create(
                      com.ikanow.aleph2.data_import_manager.analytics.actors
                          .AnalyticsTriggerWorkerActor.class),
                  hostname + ActorNameUtils.ANALYTICS_TRIGGER_WORKER_SUFFIX);

      _logger.info(
          ErrorUtils.get(
              "Attaching analytics AnalyticsTriggerWorkerActor {0} to bus {1}",
              trigger_worker, ActorUtils.ANALYTICS_TRIGGER_BUS));

      _db_actor_context
          .getAnalyticsTriggerBus()
          .subscribe(trigger_worker, ActorUtils.ANALYTICS_TRIGGER_BUS);

      _logger.info(
          ErrorUtils.get(
              "Registering {1} with {0}", ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER, hostname));

      for (int i = 0; i <= MAX_ZK_ATTEMPTS; ++i) {
        try {
          _core_distributed_services
              .getCuratorFramework()
              .create()
              .creatingParentsIfNeeded()
              .withMode(CreateMode.EPHEMERAL)
              .forPath(ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER + "/" + hostname);
          break;
        } catch (Exception e) {
          _logger.warn(
              ErrorUtils.getLongForm(
                  "Failed to register with Zookeeper: {0}, retrying={1}", e, i < MAX_ZK_ATTEMPTS));
          try {
            Thread.sleep(10000L);
          } catch (Exception __) {
          }
        }
      }
      Runtime.getRuntime()
          .addShutdownHook(
              new Thread(
                  Lambdas.wrap_runnable_u(
                      () -> {
                        _logger.info(
                            "Shutting down IkanowV1SynchronizationModule subservice=analytics");
                        _core_distributed_services
                            .getCuratorFramework()
                            .delete()
                            .deletingChildrenIfNeeded()
                            .forPath(ActorUtils.BUCKET_ANALYTICS_ZOOKEEPER + "/" + hostname);
                      })));
      _logger.info("Starting IkanowV1SynchronizationModule subservice=analytics");
    }

    ////////////////////////////////////////////////////////////////

    // GOVERNANCE

    if (_service_config.governance_enabled()) {
      _core_distributed_services.createSingletonActor(
          hostname + ".governance.actors.DataAgeOutSupervisor",
          ImmutableSet.<String>builder()
              .add(DistributedServicesPropertyBean.ApplicationNames.DataImportManager.toString())
              .build(),
          Props.create(DataAgeOutSupervisor.class));

      _logger.info("Starting IkanowV1SynchronizationModule subservice=governance");
    }
    for (; ; ) {
      try {
        Thread.sleep(10000);
      } catch (Exception e) {
      }
    }
  }
Ejemplo n.º 8
0
  /**
   * Given a bucket ...returns either - a future containing the first error encountered, _or_ a map
   * (both name and id as keys) of path names (and guarantee that the file has been cached when the
   * future completes)
   *
   * @param bucket
   * @param management_db
   * @param globals
   * @param fs
   * @param handler_for_errors
   * @param msg_for_errors
   * @return a future containing the first error encountered, _or_ a map (both name and id as keys)
   *     of path names
   */
  @SuppressWarnings("unchecked")
  protected static <M>
      CompletableFuture<
              Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
          cacheJars(
              final DataBucketBean bucket,
              final IManagementDbService management_db,
              final GlobalPropertiesBean globals,
              final IStorageService fs,
              final IServiceContext context,
              final String handler_for_errors,
              final M msg_for_errors) {
    try {
      MethodNamingHelper<SharedLibraryBean> helper =
          BeanTemplateUtils.from(SharedLibraryBean.class);
      final Optional<QueryComponent<SharedLibraryBean>> spec = getQuery(bucket);
      if (!spec.isPresent()) {
        return CompletableFuture.completedFuture(
            Validation.<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>success(
                Collections.emptyMap()));
      }

      return management_db
          .getSharedLibraryStore()
          .secured(context, new AuthorizationBean(bucket.owner_id()))
          .getObjectsBySpec(spec.get())
          .thenComposeAsync(
              cursor -> {
                // This is a map of futures from the cache call - either an error or the path name
                // note we use a tuple of (id, name) as the key and then flatten out later
                final Map<
                        Tuple2<String, String>,
                        Tuple2<
                            SharedLibraryBean,
                            CompletableFuture<Validation<BasicMessageBean, String>>>>
                    map_of_futures =
                        StreamSupport.stream(cursor.spliterator(), true)
                            .filter(
                                lib -> {
                                  return true;
                                })
                            .collect(
                                Collectors
                                    .<SharedLibraryBean, Tuple2<String, String>,
                                        Tuple2<
                                            SharedLibraryBean,
                                            CompletableFuture<
                                                Validation<BasicMessageBean, String>>>>
                                        toMap(
                                            // want to keep both the name and id versions - will
                                            // flatten out below
                                            lib -> Tuples._2T(lib.path_name(), lib._id()), // (key)
                                            // spin off a future in which the file is being copied -
                                            // save the shared library bean also
                                            lib ->
                                                Tuples._2T(
                                                    lib, // (value)
                                                    JarCacheUtils.getCachedJar(
                                                        globals.local_cached_jar_dir(),
                                                        lib,
                                                        fs,
                                                        handler_for_errors,
                                                        msg_for_errors))));

                // denest from map of futures to future of maps, also handle any errors here:
                // (some sort of "lift" function would be useful here - this are a somewhat
                // inelegant few steps)

                final CompletableFuture<Validation<BasicMessageBean, String>>[] futures =
                    (CompletableFuture<Validation<BasicMessageBean, String>>[])
                        map_of_futures
                            .values()
                            .stream()
                            .map(t2 -> t2._2())
                            .collect(Collectors.toList())
                            .toArray(new CompletableFuture[0]);

                // (have to embed this thenApply instead of bringing it outside as part of the
                // toCompose chain, because otherwise we'd lose map_of_futures scope)
                return CompletableFuture.allOf(futures)
                    .<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
                        thenApply(
                            f -> {
                              try {
                                final Map<String, Tuple2<SharedLibraryBean, String>> almost_there =
                                    map_of_futures
                                        .entrySet()
                                        .stream()
                                        .flatMap(
                                            kv -> {
                                              final Validation<BasicMessageBean, String> ret =
                                                  kv.getValue()
                                                      ._2()
                                                      .join(); // (must have already returned if
                                              // here
                                              return ret
                                                  .<Stream<
                                                          Tuple2<
                                                              String,
                                                              Tuple2<SharedLibraryBean, String>>>>
                                                      validation(
                                                          // Error:
                                                          err -> {
                                                            throw new RuntimeException(
                                                                err.message());
                                                          } // (not ideal, but will do)
                                                          ,
                                                          // Normal:
                                                          s -> {
                                                            return Arrays.asList(
                                                                    Tuples._2T(
                                                                        kv.getKey()._1(),
                                                                        Tuples._2T(
                                                                            kv.getValue()._1(),
                                                                            s)), // result object
                                                                    // with path_name
                                                                    Tuples._2T(
                                                                        kv.getKey()._2(),
                                                                        Tuples._2T(
                                                                            kv.getValue()._1(),
                                                                            s))) // result object
                                                                // with id
                                                                .stream();
                                                          });
                                            })
                                        .collect(
                                            Collectors
                                                .<Tuple2<String, Tuple2<SharedLibraryBean, String>>,
                                                    String, Tuple2<SharedLibraryBean, String>>
                                                    toMap(
                                                        idname_path -> idname_path._1(), // (key)
                                                        idname_path -> idname_path._2() // (value)
                                                        ));
                                return Validation
                                    .<BasicMessageBean,
                                        Map<String, Tuple2<SharedLibraryBean, String>>>
                                        success(almost_there);
                              } catch (
                                  Exception e) { // handle the exception thrown above containing the
                                // message bean from whatever the original error was!
                                return Validation
                                    .<BasicMessageBean,
                                        Map<String, Tuple2<SharedLibraryBean, String>>>
                                        fail(
                                            SharedErrorUtils.buildErrorMessage(
                                                handler_for_errors.toString(),
                                                msg_for_errors,
                                                e.getMessage()));
                              }
                            });
              });
    } catch (Throwable e) { // (can only occur if the DB call errors)
      return CompletableFuture.completedFuture(
          Validation.fail(
              SharedErrorUtils.buildErrorMessage(
                  handler_for_errors.toString(),
                  msg_for_errors,
                  ErrorUtils.getLongForm(
                      SharedErrorUtils.ERROR_CACHING_SHARED_LIBS, e, bucket.full_name()))));
    }
  }
Ejemplo n.º 9
0
  /**
   * Talks to the topology module - this top level function just sets the classloader up and creates
   * the module, then calls talkToStream to do the talking
   *
   * @param bucket
   * @param libs
   * @param harvest_tech_only
   * @param m
   * @param source
   * @return
   */
  protected static Validation<BasicMessageBean, IEnrichmentStreamingTopology> getStreamingTopology(
      final DataBucketBean bucket,
      final BucketActionMessage m,
      final String source,
      final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>
          err_or_libs // "pipeline element"
      ) {
    try {

      return err_or_libs.<Validation<BasicMessageBean, IEnrichmentStreamingTopology>>validation(
          // Error:
          error -> Validation.fail(error),
          // Normal
          libs -> {
            // Easy case, if streaming is turned off, just pass data through this layer
            if (!Optional.ofNullable(bucket.streaming_enrichment_topology().enabled()).orElse(true))
              return Validation.success(new PassthroughTopology());
            // Easy case, if libs is empty then use the default streaming topology
            if (libs.isEmpty()) {
              return Validation.success(new PassthroughTopology());
            }

            final Tuple2<SharedLibraryBean, String> libbean_path =
                libs.values()
                    .stream()
                    .filter(
                        t2 ->
                            (null != t2._1())
                                && (null
                                    != Optional.ofNullable(
                                            t2._1().streaming_enrichment_entry_point())
                                        .orElse(t2._1().misc_entry_point())))
                    .findFirst()
                    .orElse(null);

            if ((null == libbean_path)
                || (null
                    == libbean_path._2())) { // Nice easy error case, probably can't ever happen
              return Validation.fail(
                  SharedErrorUtils.buildErrorMessage(
                      source,
                      m,
                      SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND,
                      bucket.full_name(),
                      "(unknown)"));
            }

            final Validation<BasicMessageBean, IEnrichmentStreamingTopology> ret_val =
                ClassloaderUtils.getFromCustomClasspath(
                    IEnrichmentStreamingTopology.class,
                    Optional.ofNullable(libbean_path._1().streaming_enrichment_entry_point())
                        .orElse(libbean_path._1().misc_entry_point()),
                    Optional.of(libbean_path._2()),
                    libs.values().stream().map(lp -> lp._2()).collect(Collectors.toList()),
                    source,
                    m);

            return ret_val;
          });
    } catch (Throwable t) {
      return Validation.fail(
          SharedErrorUtils.buildErrorMessage(
              source,
              m,
              ErrorUtils.getLongForm(
                  SharedErrorUtils.ERROR_LOADING_CLASS,
                  t,
                  bucket.harvest_technology_name_or_id())));
    }
  }
Ejemplo n.º 10
0
  protected static CompletableFuture<BucketActionReplyMessage> talkToStream(
      final IStormController storm_controller,
      final DataBucketBean bucket,
      final BucketActionMessage m,
      final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_user_topology,
      final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> err_or_map,
      final String source,
      final StreamingEnrichmentContext context,
      final String yarn_config_dir,
      final String cached_jars_dir) {
    try {
      // handle getting the user libs
      final List<String> user_lib_paths =
          err_or_map.<List<String>>validation(
              fail -> Collections.emptyList() // (going to die soon anyway)
              ,
              success ->
                  success
                      .values()
                      .stream()
                      .map(tuple -> tuple._2.replaceFirst("file:", ""))
                      .collect(Collectors.toList()));

      return err_or_user_topology.<CompletableFuture<BucketActionReplyMessage>>validation(
          // ERROR getting enrichment topology
          error -> {
            return CompletableFuture.completedFuture(new BucketActionHandlerMessage(source, error));
          },
          // NORMAL grab enrichment topology
          enrichment_topology -> {
            final String entry_point = enrichment_topology.getClass().getName();
            context.setBucket(bucket);
            context.setUserTopologyEntryPoint(entry_point);
            // also set the library bean - note if here then must have been set, else
            // IHarvestTechnologyModule wouldn't exist
            err_or_map.forEach(
                map -> {
                  context.setLibraryConfig(
                      map.values()
                          .stream()
                          .map(t2 -> t2._1())
                          .filter(
                              lib ->
                                  entry_point.equals(lib.misc_entry_point())
                                      || entry_point.equals(lib.streaming_enrichment_entry_point()))
                          .findFirst()
                          .orElse(BeanTemplateUtils.build(SharedLibraryBean.class).done().get()));
                  // (else this is a passthrough topology, so just use a dummy library bean)
                });

            _logger.info(
                "Set active class="
                    + enrichment_topology.getClass()
                    + " message="
                    + m.getClass().getSimpleName()
                    + " bucket="
                    + bucket.full_name());

            return Patterns.match(m)
                .<CompletableFuture<BucketActionReplyMessage>>andReturn()
                .when(
                    BucketActionMessage.DeleteBucketActionMessage.class,
                    msg -> {
                      return StormControllerUtil.stopJob(storm_controller, bucket);
                    })
                .when(
                    BucketActionMessage.NewBucketActionMessage.class,
                    msg -> {
                      if (!msg.is_suspended())
                        return StormControllerUtil.startJob(
                            storm_controller,
                            bucket,
                            context,
                            user_lib_paths,
                            enrichment_topology,
                            cached_jars_dir);
                      else
                        return StormControllerUtil.stopJob(
                            storm_controller,
                            bucket); // (nothing to do but just do this to return something
                      // sensible)
                    })
                .when(
                    BucketActionMessage.UpdateBucketActionMessage.class,
                    msg -> {
                      if (msg.is_enabled())
                        return StormControllerUtil.restartJob(
                            storm_controller,
                            bucket,
                            context,
                            user_lib_paths,
                            enrichment_topology,
                            cached_jars_dir);
                      else return StormControllerUtil.stopJob(storm_controller, bucket);
                    })
                .when(
                    BucketActionMessage.TestBucketActionMessage.class,
                    msg -> {
                      // TODO (ALEPH-25): in the future run this test with local storm rather than
                      // remote storm_controller
                      return StormControllerUtil.restartJob(
                          storm_controller,
                          bucket,
                          context,
                          user_lib_paths,
                          enrichment_topology,
                          cached_jars_dir);
                    })
                .otherwise(
                    msg -> {
                      return CompletableFuture.completedFuture(
                          new BucketActionHandlerMessage(
                              source,
                              new BasicMessageBean(
                                  new Date(),
                                  false,
                                  null,
                                  "Unknown message",
                                  0,
                                  "Unknown message",
                                  null)));
                    });
          });
    } catch (Throwable e) { // (trying to use Validation to avoid this, but just in case...)
      return CompletableFuture.completedFuture(
          new BucketActionHandlerMessage(
              source,
              new BasicMessageBean(
                  new Date(),
                  false,
                  null,
                  ErrorUtils.getLongForm("Error loading streaming class: {0}", e),
                  0,
                  ErrorUtils.getLongForm("Error loading streaming class: {0}", e),
                  null)));
    }
  }
Ejemplo n.º 11
0
  /* (non-Javadoc)
   * @see akka.actor.AbstractActor#receive()
   */
  @Override
  public PartialFunction<Object, BoxedUnit> receive() {
    return ReceiveBuilder.match(
            BucketActionMessage.class,
            m ->
                !m.handling_clients().isEmpty()
                    && !m.handling_clients()
                        .contains(_context.getInformationService().getHostname()),
            __ -> {}) // (do nothing if it's not for me)
        .match(
            BucketActionOfferMessage.class,
            m -> {
              _logger.info(
                  ErrorUtils.get(
                      "Actor {0} received message {1} from {2} bucket {3}",
                      this.self(),
                      m.getClass().getSimpleName(),
                      this.sender(),
                      m.bucket().full_name()));

              final ActorRef closing_sender = this.sender();
              final ActorRef closing_self = this.self();

              final String hostname = _context.getInformationService().getHostname();

              // (this isn't async so doesn't require any futures)

              final boolean accept_or_ignore =
                  new File(_globals.local_yarn_config_dir() + File.separator + "storm.yaml")
                      .exists();

              final BucketActionReplyMessage reply =
                  accept_or_ignore
                      ? new BucketActionReplyMessage.BucketActionWillAcceptMessage(hostname)
                      : new BucketActionReplyMessage.BucketActionIgnoredMessage(hostname);

              closing_sender.tell(reply, closing_self);
            })
        .match(
            BucketActionMessage.class,
            m -> {
              _logger.info(
                  ErrorUtils.get(
                      "Actor {0} received message {1} from {2} bucket={3}",
                      this.self(),
                      m.getClass().getSimpleName(),
                      this.sender(),
                      m.bucket().full_name()));

              final ActorRef closing_sender = this.sender();
              final ActorRef closing_self = this.self();

              final String hostname = _context.getInformationService().getHostname();

              // (cacheJars can't throw checked or unchecked in this thread, only from within
              // exceptions)
              cacheJars(
                      m.bucket(),
                      _management_db,
                      _globals,
                      _fs,
                      _context.getServiceContext(),
                      hostname,
                      m)
                  .thenComposeAsync(
                      err_or_map -> {
                        final StreamingEnrichmentContext e_context =
                            _context.getNewStreamingEnrichmentContext();

                        final Validation<BasicMessageBean, IEnrichmentStreamingTopology>
                            err_or_tech_module =
                                getStreamingTopology(m.bucket(), m, hostname, err_or_map);

                        final CompletableFuture<BucketActionReplyMessage> ret =
                            talkToStream(
                                _storm_controller,
                                m.bucket(),
                                m,
                                err_or_tech_module,
                                err_or_map,
                                hostname,
                                e_context,
                                _globals.local_yarn_config_dir(),
                                _globals.local_cached_jar_dir());
                        return ret;
                      })
                  .thenAccept(
                      reply -> { // (reply can contain an error or successful reply, they're the
                        // same bean type)
                        // Some information logging:
                        Patterns.match(reply)
                            .andAct()
                            .when(
                                BucketActionHandlerMessage.class,
                                msg ->
                                    _logger.info(
                                        ErrorUtils.get(
                                            "Standard reply to message={0}, bucket={1}, success={2}",
                                            m.getClass().getSimpleName(),
                                            m.bucket().full_name(),
                                            msg.reply().success())))
                            .when(
                                BucketActionReplyMessage.BucketActionWillAcceptMessage.class,
                                msg ->
                                    _logger.info(
                                        ErrorUtils.get(
                                            "Standard reply to message={0}, bucket={1}",
                                            m.getClass().getSimpleName(), m.bucket().full_name())))
                            .otherwise(
                                msg ->
                                    _logger.info(
                                        ErrorUtils.get(
                                            "Unusual reply to message={0}, type={2}, bucket={1}",
                                            m.getClass().getSimpleName(),
                                            m.bucket().full_name(),
                                            msg.getClass().getSimpleName())));

                        closing_sender.tell(reply, closing_self);
                      })
                  .exceptionally(
                      e -> { // another bit of error handling that shouldn't ever be called but is a
                        // useful backstop
                        // Some information logging:
                        _logger.warn(
                            "Unexpected error replying to '{0}': error = {1}, bucket={2}",
                            BeanTemplateUtils.toJson(m).toString(),
                            ErrorUtils.getLongForm("{0}", e),
                            m.bucket().full_name());

                        final BasicMessageBean error_bean =
                            SharedErrorUtils.buildErrorMessage(
                                hostname,
                                m,
                                ErrorUtils.getLongForm(
                                    StreamErrorUtils.STREAM_UNKNOWN_ERROR,
                                    e,
                                    m.bucket().full_name()));
                        closing_sender.tell(
                            new BucketActionHandlerMessage(hostname, error_bean), closing_self);
                        return null;
                      });
            })
        .build();
  }