/**
  * Utility function returning whether a bucket is using streaming enrichment
  *
  * @param bucket
  * @return
  */
 private static boolean streamingEnrichmentEnabled(final DataBucketBean bucket) {
   return _enrichment_types.contains(bucket.master_enrichment_type()) // streaming is being used...
       && (null != bucket.streaming_enrichment_topology()) // ...and streaming is present...
       && Optional.ofNullable(bucket.streaming_enrichment_topology().enabled())
           .orElse(true) // ..and enabled...
   ;
 }
  /**
   * Validate a single job for this analytic technology in the context of the bucket/other jobs
   *
   * @param analytic_bucket - the bucket (just for context)
   * @param jobs - the entire list of jobs (not normally required)
   * @param job - the actual job
   * @return the validated bean (check for success:false)
   */
  public static BasicMessageBean validateJob(
      final DataBucketBean analytic_bucket,
      final Collection<AnalyticThreadJobBean> jobs,
      final AnalyticThreadJobBean job) {

    final LinkedList<String> errors = new LinkedList<>();

    // This is for Storm specific validation
    // The core validation checks most of the "boilerplate" type requirements

    // Temporary limitations we'll police
    // - currently can only handle streaming inputs
    // - currently transient outputs have to be streaming

    // inputs

    Optionals.ofNullable(job.inputs())
        .stream()
        .forEach(
            input -> {
              if (!"stream".equals(input.data_service())) {
                errors.add(
                    ErrorUtils.get(
                        ErrorUtils.TEMP_INPUTS_MUST_BE_STREAMING,
                        analytic_bucket.full_name(),
                        job.name(),
                        input.data_service()));
              }
            });

    // output:

    if (null != job.output()) {
      if (Optional.ofNullable(job.output().is_transient()).orElse(false)) {
        final MasterEnrichmentType output_type =
            Optional.ofNullable(job.output().transient_type()).orElse(MasterEnrichmentType.none);
        if (MasterEnrichmentType.streaming != output_type) {
          errors.add(
              ErrorUtils.get(
                  ErrorUtils.TEMP_TRANSIENT_OUTPUTS_MUST_BE_STREAMING,
                  analytic_bucket.full_name(),
                  job.name(),
                  output_type));
        }
      }
    }

    final boolean success = errors.isEmpty();

    return ErrorUtils.buildMessage(
        success,
        StormAnalyticTechnologyUtils.class,
        "validateJobs",
        errors.stream().collect(Collectors.joining(";")));
  }
  /**
   * Validate a single job for this analytic technology in the context of the bucket/other jobs
   *
   * @param analytic_bucket - the bucket (just for context)
   * @param jobs - the entire list of jobs
   * @return the validated bean (check for success:false)
   */
  public static BasicMessageBean validateJobs(
      final DataBucketBean analytic_bucket, final Collection<AnalyticThreadJobBean> jobs) {

    // Global validation:

    // Here we'll check:
    // - doesn't have both streaming and analytic threads (maybe later we can allow this but it's
    // gonna get a bit complicated to start with)

    final BasicMessageBean global_res =
        Lambdas.get(
            () -> {
              if (streamingEnrichmentEnabled(
                      analytic_bucket) // (see below - all streaming options on)
                  && (null != analytic_bucket.analytic_thread()) // ...and analytics is present...
                  && Optionals.ofNullable(analytic_bucket.analytic_thread().jobs())
                      .stream()
                      .filter(
                          j ->
                              Optional.ofNullable(j.enabled())
                                  .orElse(true)) // ...and at least one job is enabled!
                      .findAny()
                      .isPresent()) {
                return ErrorUtils.buildErrorMessage(
                    StormAnalyticTechnologyUtils.class,
                    "validateJobs",
                    ErrorUtils.get(
                        ErrorUtils.TEMP_MIXED_ANALYTICS_AND_ENRICHMENT,
                        analytic_bucket.full_name()));
              } else
                return ErrorUtils.buildSuccessMessage(
                    StormAnalyticTechnologyUtils.class, "validateJobs", "");
            });

    if (!global_res.success()) {
      return global_res;
    }
    // (Else graduate to per job validation)

    // Per-job validation:

    final List<BasicMessageBean> res =
        jobs.stream()
            .map(job -> validateJob(analytic_bucket, jobs, job))
            .collect(Collectors.toList());

    final boolean success = res.stream().allMatch(msg -> msg.success());

    final String message = res.stream().map(msg -> msg.message()).collect(Collectors.joining("\n"));

    return ErrorUtils.buildMessage(
        success, StormAnalyticTechnologyUtils.class, "validateJobs", message);
  }
Exemple #4
0
  @Test
  public void test_ConvertDataBucketBeanToTest() {
    String original_full_name = "/my_bean/sample_path";
    String original_id = "id12345";
    String user_id = "user12345";
    DataBucketBean original_bean =
        BeanTemplateUtils.build(DataBucketBean.class)
            .with(DataBucketBean::_id, "id12345")
            .with(DataBucketBean::full_name, original_full_name)
            .done()
            .get();

    DataBucketBean test_bean = BucketUtils.convertDataBucketBeanToTest(original_bean, user_id);

    assertTrue(test_bean._id().equals(original_id));
    assertTrue(
        "Name is wrong: " + test_bean.full_name(),
        test_bean.full_name().equals("/aleph2_testing/" + user_id + original_full_name));

    assertTrue(BucketUtils.isTestBucket(test_bean));
    assertFalse(BucketUtils.isTestBucket(original_bean));
  }
  /**
   * Creates a query component to get all the shared library beans i need
   *
   * @param bucket
   * @param cache_tech_jar_only
   * @return
   */
  protected static Optional<QueryComponent<SharedLibraryBean>> getQuery(
      final DataBucketBean bucket) {
    final Stream<QueryComponent<SharedLibraryBean>> libs =
        Optionals.ofNullable(
                Optional.ofNullable(bucket.streaming_enrichment_topology())
                    .map(t -> t.library_names_or_ids())
                    .orElse(Collections.emptyList()))
            .stream()
            .map(
                name -> {
                  return CrudUtils.anyOf(SharedLibraryBean.class)
                      .when(SharedLibraryBean::_id, name)
                      .when(SharedLibraryBean::path_name, name);
                });

    final CrudUtils.MultiQueryComponent<SharedLibraryBean> mqc =
        CrudUtils.<SharedLibraryBean>anyOf(libs);
    return mqc.getElements().isEmpty() ? Optional.empty() : Optional.of(mqc);
  }
  @Test
  public void test_indexCreation() throws IOException {

    final Calendar time_setter = GregorianCalendar.getInstance();
    time_setter.set(2015, 1, 1, 13, 0, 0);

    final String bucket_str =
        Resources.toString(
            Resources.getResource(
                "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"),
            Charsets.UTF_8);
    final DataBucketBean bucket =
        BeanTemplateUtils.build(bucket_str, DataBucketBean.class)
            .with("modified", time_setter.getTime())
            .done()
            .get();

    final String mapping_str =
        Resources.toString(
            Resources.getResource(
                "com/ikanow/aleph2/search_service/elasticsearch/services/test_verbose_mapping_validate_results.json"),
            Charsets.UTF_8);
    final JsonNode mapping_json = _mapper.readTree(mapping_str.getBytes());

    final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket);

    try {
      _crud_factory
          .getClient()
          .admin()
          .indices()
          .prepareDeleteTemplate(template_name)
          .execute()
          .actionGet();
    } catch (Exception e) {
    } // (This is fine, just means it doesn't exist)

    // Create index template from empty

    {
      final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name);
      final GetIndexTemplatesResponse gtr =
          _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet();
      assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty());

      _index_service.handlePotentiallyNewIndex(
          bucket,
          Optional.empty(),
          ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper),
          "_default_");

      final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name);
      final GetIndexTemplatesResponse gtr2 =
          _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet();
      assertEquals(1, _index_service._bucket_template_cache.size());
      assertEquals(1, gtr2.getIndexTemplates().size());

      assertTrue(
          "Mappings should be equivalent",
          ElasticsearchIndexService.mappingsAreEquivalent(
              gtr2.getIndexTemplates().get(0), mapping_json, _mapper));
    }

    // Check is ignored subsequently (same date, same content; same date, different content)
    {
      _index_service.handlePotentiallyNewIndex(
          bucket,
          Optional.empty(),
          ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper),
          "_default_");

      final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name);
      final GetIndexTemplatesResponse gtr2 =
          _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet();
      assertEquals(1, _index_service._bucket_template_cache.size());
      assertEquals(1, gtr2.getIndexTemplates().size());
    }

    // Check is checked-but-left if time updated, content not
    {
      time_setter.set(2015, 1, 1, 14, 0, 0);
      final Date next_time = time_setter.getTime();
      final DataBucketBean bucket2 =
          BeanTemplateUtils.clone(bucket).with("modified", next_time).done();

      _index_service.handlePotentiallyNewIndex(
          bucket2,
          Optional.empty(),
          ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper),
          "_default_");

      final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name);
      final GetIndexTemplatesResponse gtr2 =
          _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet();
      assertEquals(1, _index_service._bucket_template_cache.size());
      assertEquals(next_time, _index_service._bucket_template_cache.get(bucket._id()));
      assertEquals(1, gtr2.getIndexTemplates().size());
    }

    // Check is updated if time-and-content is different
    {
      time_setter.set(2015, 1, 1, 15, 0, 0);
      final String bucket_str2 =
          Resources.toString(
              Resources.getResource(
                  "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket2_validate_success.json"),
              Charsets.UTF_8);
      final DataBucketBean bucket2 =
          BeanTemplateUtils.build(bucket_str2, DataBucketBean.class)
              .with("modified", time_setter.getTime())
              .done()
              .get();

      _index_service.handlePotentiallyNewIndex(
          bucket2,
          Optional.empty(),
          ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper),
          "_default_");

      final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name);
      final GetIndexTemplatesResponse gtr2 =
          _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet();
      assertEquals(1, _index_service._bucket_template_cache.size());
      assertEquals(time_setter.getTime(), _index_service._bucket_template_cache.get(bucket._id()));
      assertEquals(1, gtr2.getIndexTemplates().size());

      assertFalse(
          ElasticsearchIndexService.mappingsAreEquivalent(
              gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); // has changed
    }

    // Check if mapping is deleted then next time bucket modified is updated then the mapping is
    // recreated

    {
      _crud_factory
          .getClient()
          .admin()
          .indices()
          .prepareDeleteTemplate(template_name)
          .execute()
          .actionGet();

      // (check with old date)

      final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name);
      final GetIndexTemplatesResponse gtr =
          _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet();
      assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty());

      {
        _index_service.handlePotentiallyNewIndex(
            bucket,
            Optional.empty(),
            ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper),
            "_default_");

        final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name);
        final GetIndexTemplatesResponse gtr2 =
            _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet();
        assertTrue("Initially no change", gtr2.getIndexTemplates().isEmpty());
      }

      // Update date and retry

      {
        time_setter.set(2015, 1, 1, 16, 0, 0);
        final Date next_time = time_setter.getTime();
        final DataBucketBean bucket2 =
            BeanTemplateUtils.clone(bucket).with("modified", next_time).done();

        _index_service.handlePotentiallyNewIndex(
            bucket2,
            Optional.empty(),
            ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper),
            "_default_");

        final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name);
        final GetIndexTemplatesResponse gtr2 =
            _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet();
        assertEquals(1, _index_service._bucket_template_cache.size());
        assertEquals(1, gtr2.getIndexTemplates().size());

        assertTrue(
            "Mappings should be equivalent",
            ElasticsearchIndexService.mappingsAreEquivalent(
                gtr2.getIndexTemplates().get(0), mapping_json, _mapper));
      }
    }
  }
  @Test
  public void test_validationFail() throws IOException {

    final String bucket_str =
        Resources.toString(
            Resources.getResource(
                "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_fail.json"),
            Charsets.UTF_8);
    final DataBucketBean bucket =
        BeanTemplateUtils.build(bucket_str, DataBucketBean.class).done().get();

    // 1) Verbose mode off
    {
      final Collection<BasicMessageBean> res_col =
          _index_service.validateSchema(bucket.data_schema().columnar_schema(), bucket)._2();
      final Collection<BasicMessageBean> res_search =
          _index_service.validateSchema(bucket.data_schema().search_index_schema(), bucket)._2();
      final Collection<BasicMessageBean> res_time =
          _index_service.validateSchema(bucket.data_schema().temporal_schema(), bucket)._2();

      assertEquals(0, res_col.size());
      assertEquals(0, res_time.size());
      assertEquals(1, res_search.size());

      final BasicMessageBean res_search_message = res_search.iterator().next();
      assertEquals(false, res_search_message.success());
    }

    // 2) Check setting an invalid max index size
    {
      final String bucket_str_2 =
          Resources.toString(
              Resources.getResource(
                  "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"),
              Charsets.UTF_8);
      final DataBucketBean bucket2 =
          BeanTemplateUtils.build(bucket_str_2, DataBucketBean.class).done().get();
      final DataBucketBean bucket_too_small =
          BeanTemplateUtils.clone(bucket2)
              .with(
                  DataBucketBean::data_schema,
                  BeanTemplateUtils.clone(bucket2.data_schema())
                      .with(
                          DataSchemaBean::search_index_schema,
                          BeanTemplateUtils.clone(bucket2.data_schema().search_index_schema())
                              .with(DataSchemaBean.SearchIndexSchemaBean::target_index_size_mb, 10L)
                              .done())
                      .done())
              .done();
      final Collection<BasicMessageBean> res_search =
          _index_service
              .validateSchema(bucket.data_schema().search_index_schema(), bucket_too_small)
              ._2();
      assertEquals(1, res_search.size());
      assertEquals(false, res_search.stream().allMatch(BasicMessageBean::success));
      BasicMessageBean res_search_message = res_search.iterator().next();
      assertTrue(
          "Right message: " + res_search_message.message(),
          res_search_message.message().contains("10 MB"));
    }
  }
  @Test
  public void test_validationSuccess() throws IOException {
    final String bucket_str =
        Resources.toString(
            Resources.getResource(
                "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"),
            Charsets.UTF_8);
    final DataBucketBean bucket =
        BeanTemplateUtils.build(bucket_str, DataBucketBean.class).done().get();

    // 1) Verbose mode off
    {
      final Collection<BasicMessageBean> res_col =
          _index_service.validateSchema(bucket.data_schema().columnar_schema(), bucket)._2();
      final Collection<BasicMessageBean> res_search =
          _index_service.validateSchema(bucket.data_schema().search_index_schema(), bucket)._2();
      final Collection<BasicMessageBean> res_time =
          _index_service.validateSchema(bucket.data_schema().temporal_schema(), bucket)._2();

      assertEquals(0, res_col.size());
      assertEquals(0, res_search.size());
      assertEquals(0, res_time.size());
    }

    // 2) Verbose mode on
    {
      final DataBucketBean bucket_verbose =
          BeanTemplateUtils.clone(bucket)
              .with(
                  DataBucketBean::data_schema,
                  BeanTemplateUtils.clone(bucket.data_schema())
                      .with(
                          DataSchemaBean::search_index_schema,
                          BeanTemplateUtils.clone(bucket.data_schema().search_index_schema())
                              .with(
                                  DataSchemaBean.SearchIndexSchemaBean::technology_override_schema,
                                  ImmutableMap.builder()
                                      .putAll(
                                          bucket
                                              .data_schema()
                                              .search_index_schema()
                                              .technology_override_schema())
                                      .put("verbose", true)
                                      .build())
                              .done())
                      .done())
              .done();

      final Collection<BasicMessageBean> res_col =
          _index_service
              .validateSchema(bucket_verbose.data_schema().columnar_schema(), bucket)
              ._2();
      final Collection<BasicMessageBean> res_search =
          _index_service
              .validateSchema(bucket_verbose.data_schema().search_index_schema(), bucket)
              ._2();
      final Collection<BasicMessageBean> res_time =
          _index_service
              .validateSchema(bucket_verbose.data_schema().temporal_schema(), bucket)
              ._2();

      assertEquals(0, res_col.size());
      assertEquals(0, res_time.size());
      assertEquals(2, res_search.size());
      assertEquals(true, res_search.stream().allMatch(BasicMessageBean::success));
      Iterator<BasicMessageBean> res_search_message = res_search.iterator();

      final String mapping_str =
          Resources.toString(
              Resources.getResource(
                  "com/ikanow/aleph2/search_service/elasticsearch/services/test_verbose_mapping_validate_results.json"),
              Charsets.UTF_8);
      final JsonNode mapping_json = _mapper.readTree(mapping_str.getBytes());
      assertEquals(
          mapping_json.toString(),
          _mapper.readTree(res_search_message.next().message()).toString());
      assertTrue(
          "Sets the max index override: "
              + res_search.stream().skip(1).map(m -> m.message()).collect(Collectors.joining()),
          res_search_message.next().message().contains("1,000 MB"));
    }

    // 3) Temporal

    {
      final DataBucketBean bucket_temporal_no_grouping =
          BeanTemplateUtils.clone(bucket)
              .with(
                  DataBucketBean::data_schema,
                  BeanTemplateUtils.clone(bucket.data_schema())
                      .with(
                          DataSchemaBean::temporal_schema,
                          BeanTemplateUtils.build(DataSchemaBean.TemporalSchemaBean.class)
                              .done()
                              .get())
                      .done())
              .done();

      assertEquals(
          "",
          _index_service
              .validateSchema(bucket_temporal_no_grouping.data_schema().temporal_schema(), bucket)
              ._1());

      final DataBucketBean bucket_temporal_grouping =
          BeanTemplateUtils.clone(bucket)
              .with(
                  DataBucketBean::data_schema,
                  BeanTemplateUtils.clone(bucket.data_schema())
                      .with(
                          DataSchemaBean::temporal_schema,
                          BeanTemplateUtils.build(DataSchemaBean.TemporalSchemaBean.class)
                              .with(DataSchemaBean.TemporalSchemaBean::grouping_time_period, "1d")
                              .done()
                              .get())
                      .done())
              .done();

      assertEquals(
          "_{yyyy-MM-dd}",
          _index_service
              .validateSchema(bucket_temporal_grouping.data_schema().temporal_schema(), bucket)
              ._1());
    }
  }
  @Test
  public void test_getStreamingTopology()
      throws UnsupportedFileSystemException, InterruptedException, ExecutionException {
    final DataBucketBean bucket = createBucket("test_tech_id_stream");

    final String pathname1 =
        System.getProperty("user.dir") + "/misc_test_assets/simple-topology-example.jar";
    final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1));
    final String pathname2 =
        System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar";
    final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2));

    List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2);

    //////////////////////////////////////////////////////

    // 1) Check - if called with an error, then just passes that error along

    final BasicMessageBean error =
        SharedErrorUtils.buildErrorMessage("test_source", "test_message", "test_error");

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test1 =
        DataBucketChangeActor.getStreamingTopology(
            bucket,
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source2",
            Validation.fail(error));

    assertTrue("Got error back", test1.isFail());
    assertEquals("test_source", test1.fail().source());
    assertEquals("test_message", test1.fail().command());
    assertEquals("test_error", test1.fail().message());

    //////////////////////////////////////////////////////

    // 2) Check the error handling inside getStreamingTopology

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test2_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream_2b", Tuples._2T(null, null))
            .build();

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2a =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream_2a"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source2a",
            Validation.success(test2_input));

    assertTrue("Got error back", test2a.isFail());
    assertEquals("test_source2a", test2a.fail().source());
    assertEquals("BucketActionOfferMessage", test2a.fail().command());
    assertEquals(
        ErrorUtils.get(
            SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND,
            bucket.full_name(),
            "(unknown)"), // (cloned bucket above)
        test2a.fail().message());

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2b =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream_2b"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source2b",
            Validation.success(test2_input));

    assertTrue("Got error back", test2b.isFail());
    assertEquals("test_source2b", test2b.fail().source());
    assertEquals("BucketActionOfferMessage", test2b.fail().command());
    assertEquals(
        ErrorUtils.get(
            SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND,
            bucket.full_name(),
            "(unknown)"), // (cloned bucket above)
        test2a.fail().message());

    //////////////////////////////////////////////////////

    // 3) OK now it will actually do something

    final String java_name =
        _service_context.getGlobalProperties().local_cached_jar_dir()
            + File.separator
            + "test_tech_id_stream.cache.jar";

    _logger.info(
        "Needed to delete locally cached file? " + java_name + ": " + new File(java_name).delete());

    // Requires that the file has already been cached:
    final Validation<BasicMessageBean, String> cached_file =
        JarCacheUtils.getCachedJar(
                _service_context.getGlobalProperties().local_cached_jar_dir(),
                lib_elements.get(0),
                _service_context.getStorageService(),
                "test3",
                "test3")
            .get();

    if (cached_file.isFail()) {
      fail("About to crash with: " + cached_file.fail().message());
    }

    assertTrue("The cached file exists: " + java_name, new File(java_name).exists());

    // OK the setup is done and validated now actually test the underlying call:

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success()))
            .build();

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3 =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source3",
            Validation.success(test3_input));

    if (test3.isFail()) {
      fail("About to crash with: " + test3.fail().message());
    }
    assertTrue("getStreamingTopology call succeeded", test3.isSuccess());
    assertTrue("topology created: ", test3.success() != null);
    assertEquals(lib_elements.get(0).misc_entry_point(), test3.success().getClass().getName());

    // (Try again but with failing version, due to class not found)

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3a_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream_fail", Tuples._2T(lib_elements.get(3), cached_file.success()))
            .build();

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3a =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(
                    DataBucketBean::streaming_enrichment_topology,
                    BeanTemplateUtils.build(EnrichmentControlMetadataBean.class)
                        .with(
                            EnrichmentControlMetadataBean::library_names_or_ids,
                            Arrays.asList("test_tech_id_stream_fail"))
                        .done()
                        .get())
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source3",
            Validation.success(test3a_input));

    assertTrue("Got error back", test3a.isFail());
    assertTrue(
        "Right error: " + test3a.fail().message(),
        test3a.fail().message().contains("com.ikanow.aleph2.test.example.ExampleStreamTopology"));

    // Now check with the "not just the harvest tech" flag set

    final String java_name2 =
        _service_context.getGlobalProperties().local_cached_jar_dir()
            + File.separator
            + "test_module_id.cache.jar";

    _logger.info(
        "Needed to delete locally cached file? "
            + java_name2
            + ": "
            + new File(java_name2).delete());

    // Requires that the file has already been cached:
    final Validation<BasicMessageBean, String> cached_file2 =
        JarCacheUtils.getCachedJar(
                _service_context.getGlobalProperties().local_cached_jar_dir(),
                lib_elements.get(1),
                _service_context.getStorageService(),
                "test3b",
                "test3b")
            .get();

    if (cached_file2.isFail()) {
      fail("About to crash with: " + cached_file2.fail().message());
    }

    assertTrue("The cached file exists: " + java_name, new File(java_name2).exists());

    final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3b_input =
        ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder()
            .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success()))
            .put("test_module_id", Tuples._2T(lib_elements.get(1), cached_file.success()))
            .build();

    final EnrichmentControlMetadataBean enrichment_module =
        new EnrichmentControlMetadataBean(
            "test_tech_name",
            Collections.emptyList(),
            true,
            null,
            Arrays.asList("test_tech_id_stream", "test_module_id"),
            null,
            null);

    final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3b =
        DataBucketChangeActor.getStreamingTopology(
            BeanTemplateUtils.clone(bucket)
                .with(DataBucketBean::streaming_enrichment_topology, enrichment_module)
                .done(),
            new BucketActionMessage.BucketActionOfferMessage(bucket),
            "test_source3b",
            Validation.success(test3b_input));

    if (test3b.isFail()) {
      fail("About to crash with: " + test3b.fail().message());
    }
    assertTrue("getStreamingTopology call succeeded", test3b.isSuccess());
    assertTrue("topology created: ", test3b.success() != null);
    assertEquals(lib_elements.get(0).misc_entry_point(), test3b.success().getClass().getName());

    // TODO add a test for disabled streaming but config given (should default to passthrough top
    // and
    // ignore given topology
  }
  /**
   * Given a bucket ...returns either - a future containing the first error encountered, _or_ a map
   * (both name and id as keys) of path names (and guarantee that the file has been cached when the
   * future completes)
   *
   * @param bucket
   * @param management_db
   * @param globals
   * @param fs
   * @param handler_for_errors
   * @param msg_for_errors
   * @return a future containing the first error encountered, _or_ a map (both name and id as keys)
   *     of path names
   */
  @SuppressWarnings("unchecked")
  protected static <M>
      CompletableFuture<
              Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
          cacheJars(
              final DataBucketBean bucket,
              final IManagementDbService management_db,
              final GlobalPropertiesBean globals,
              final IStorageService fs,
              final IServiceContext context,
              final String handler_for_errors,
              final M msg_for_errors) {
    try {
      MethodNamingHelper<SharedLibraryBean> helper =
          BeanTemplateUtils.from(SharedLibraryBean.class);
      final Optional<QueryComponent<SharedLibraryBean>> spec = getQuery(bucket);
      if (!spec.isPresent()) {
        return CompletableFuture.completedFuture(
            Validation.<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>success(
                Collections.emptyMap()));
      }

      return management_db
          .getSharedLibraryStore()
          .secured(context, new AuthorizationBean(bucket.owner_id()))
          .getObjectsBySpec(spec.get())
          .thenComposeAsync(
              cursor -> {
                // This is a map of futures from the cache call - either an error or the path name
                // note we use a tuple of (id, name) as the key and then flatten out later
                final Map<
                        Tuple2<String, String>,
                        Tuple2<
                            SharedLibraryBean,
                            CompletableFuture<Validation<BasicMessageBean, String>>>>
                    map_of_futures =
                        StreamSupport.stream(cursor.spliterator(), true)
                            .filter(
                                lib -> {
                                  return true;
                                })
                            .collect(
                                Collectors
                                    .<SharedLibraryBean, Tuple2<String, String>,
                                        Tuple2<
                                            SharedLibraryBean,
                                            CompletableFuture<
                                                Validation<BasicMessageBean, String>>>>
                                        toMap(
                                            // want to keep both the name and id versions - will
                                            // flatten out below
                                            lib -> Tuples._2T(lib.path_name(), lib._id()), // (key)
                                            // spin off a future in which the file is being copied -
                                            // save the shared library bean also
                                            lib ->
                                                Tuples._2T(
                                                    lib, // (value)
                                                    JarCacheUtils.getCachedJar(
                                                        globals.local_cached_jar_dir(),
                                                        lib,
                                                        fs,
                                                        handler_for_errors,
                                                        msg_for_errors))));

                // denest from map of futures to future of maps, also handle any errors here:
                // (some sort of "lift" function would be useful here - this are a somewhat
                // inelegant few steps)

                final CompletableFuture<Validation<BasicMessageBean, String>>[] futures =
                    (CompletableFuture<Validation<BasicMessageBean, String>>[])
                        map_of_futures
                            .values()
                            .stream()
                            .map(t2 -> t2._2())
                            .collect(Collectors.toList())
                            .toArray(new CompletableFuture[0]);

                // (have to embed this thenApply instead of bringing it outside as part of the
                // toCompose chain, because otherwise we'd lose map_of_futures scope)
                return CompletableFuture.allOf(futures)
                    .<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>>
                        thenApply(
                            f -> {
                              try {
                                final Map<String, Tuple2<SharedLibraryBean, String>> almost_there =
                                    map_of_futures
                                        .entrySet()
                                        .stream()
                                        .flatMap(
                                            kv -> {
                                              final Validation<BasicMessageBean, String> ret =
                                                  kv.getValue()
                                                      ._2()
                                                      .join(); // (must have already returned if
                                              // here
                                              return ret
                                                  .<Stream<
                                                          Tuple2<
                                                              String,
                                                              Tuple2<SharedLibraryBean, String>>>>
                                                      validation(
                                                          // Error:
                                                          err -> {
                                                            throw new RuntimeException(
                                                                err.message());
                                                          } // (not ideal, but will do)
                                                          ,
                                                          // Normal:
                                                          s -> {
                                                            return Arrays.asList(
                                                                    Tuples._2T(
                                                                        kv.getKey()._1(),
                                                                        Tuples._2T(
                                                                            kv.getValue()._1(),
                                                                            s)), // result object
                                                                    // with path_name
                                                                    Tuples._2T(
                                                                        kv.getKey()._2(),
                                                                        Tuples._2T(
                                                                            kv.getValue()._1(),
                                                                            s))) // result object
                                                                // with id
                                                                .stream();
                                                          });
                                            })
                                        .collect(
                                            Collectors
                                                .<Tuple2<String, Tuple2<SharedLibraryBean, String>>,
                                                    String, Tuple2<SharedLibraryBean, String>>
                                                    toMap(
                                                        idname_path -> idname_path._1(), // (key)
                                                        idname_path -> idname_path._2() // (value)
                                                        ));
                                return Validation
                                    .<BasicMessageBean,
                                        Map<String, Tuple2<SharedLibraryBean, String>>>
                                        success(almost_there);
                              } catch (
                                  Exception e) { // handle the exception thrown above containing the
                                // message bean from whatever the original error was!
                                return Validation
                                    .<BasicMessageBean,
                                        Map<String, Tuple2<SharedLibraryBean, String>>>
                                        fail(
                                            SharedErrorUtils.buildErrorMessage(
                                                handler_for_errors.toString(),
                                                msg_for_errors,
                                                e.getMessage()));
                              }
                            });
              });
    } catch (Throwable e) { // (can only occur if the DB call errors)
      return CompletableFuture.completedFuture(
          Validation.fail(
              SharedErrorUtils.buildErrorMessage(
                  handler_for_errors.toString(),
                  msg_for_errors,
                  ErrorUtils.getLongForm(
                      SharedErrorUtils.ERROR_CACHING_SHARED_LIBS, e, bucket.full_name()))));
    }
  }
  /**
   * Talks to the topology module - this top level function just sets the classloader up and creates
   * the module, then calls talkToStream to do the talking
   *
   * @param bucket
   * @param libs
   * @param harvest_tech_only
   * @param m
   * @param source
   * @return
   */
  protected static Validation<BasicMessageBean, IEnrichmentStreamingTopology> getStreamingTopology(
      final DataBucketBean bucket,
      final BucketActionMessage m,
      final String source,
      final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>
          err_or_libs // "pipeline element"
      ) {
    try {

      return err_or_libs.<Validation<BasicMessageBean, IEnrichmentStreamingTopology>>validation(
          // Error:
          error -> Validation.fail(error),
          // Normal
          libs -> {
            // Easy case, if streaming is turned off, just pass data through this layer
            if (!Optional.ofNullable(bucket.streaming_enrichment_topology().enabled()).orElse(true))
              return Validation.success(new PassthroughTopology());
            // Easy case, if libs is empty then use the default streaming topology
            if (libs.isEmpty()) {
              return Validation.success(new PassthroughTopology());
            }

            final Tuple2<SharedLibraryBean, String> libbean_path =
                libs.values()
                    .stream()
                    .filter(
                        t2 ->
                            (null != t2._1())
                                && (null
                                    != Optional.ofNullable(
                                            t2._1().streaming_enrichment_entry_point())
                                        .orElse(t2._1().misc_entry_point())))
                    .findFirst()
                    .orElse(null);

            if ((null == libbean_path)
                || (null
                    == libbean_path._2())) { // Nice easy error case, probably can't ever happen
              return Validation.fail(
                  SharedErrorUtils.buildErrorMessage(
                      source,
                      m,
                      SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND,
                      bucket.full_name(),
                      "(unknown)"));
            }

            final Validation<BasicMessageBean, IEnrichmentStreamingTopology> ret_val =
                ClassloaderUtils.getFromCustomClasspath(
                    IEnrichmentStreamingTopology.class,
                    Optional.ofNullable(libbean_path._1().streaming_enrichment_entry_point())
                        .orElse(libbean_path._1().misc_entry_point()),
                    Optional.of(libbean_path._2()),
                    libs.values().stream().map(lp -> lp._2()).collect(Collectors.toList()),
                    source,
                    m);

            return ret_val;
          });
    } catch (Throwable t) {
      return Validation.fail(
          SharedErrorUtils.buildErrorMessage(
              source,
              m,
              ErrorUtils.getLongForm(
                  SharedErrorUtils.ERROR_LOADING_CLASS,
                  t,
                  bucket.harvest_technology_name_or_id())));
    }
  }
  protected static CompletableFuture<BucketActionReplyMessage> talkToStream(
      final IStormController storm_controller,
      final DataBucketBean bucket,
      final BucketActionMessage m,
      final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_user_topology,
      final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> err_or_map,
      final String source,
      final StreamingEnrichmentContext context,
      final String yarn_config_dir,
      final String cached_jars_dir) {
    try {
      // handle getting the user libs
      final List<String> user_lib_paths =
          err_or_map.<List<String>>validation(
              fail -> Collections.emptyList() // (going to die soon anyway)
              ,
              success ->
                  success
                      .values()
                      .stream()
                      .map(tuple -> tuple._2.replaceFirst("file:", ""))
                      .collect(Collectors.toList()));

      return err_or_user_topology.<CompletableFuture<BucketActionReplyMessage>>validation(
          // ERROR getting enrichment topology
          error -> {
            return CompletableFuture.completedFuture(new BucketActionHandlerMessage(source, error));
          },
          // NORMAL grab enrichment topology
          enrichment_topology -> {
            final String entry_point = enrichment_topology.getClass().getName();
            context.setBucket(bucket);
            context.setUserTopologyEntryPoint(entry_point);
            // also set the library bean - note if here then must have been set, else
            // IHarvestTechnologyModule wouldn't exist
            err_or_map.forEach(
                map -> {
                  context.setLibraryConfig(
                      map.values()
                          .stream()
                          .map(t2 -> t2._1())
                          .filter(
                              lib ->
                                  entry_point.equals(lib.misc_entry_point())
                                      || entry_point.equals(lib.streaming_enrichment_entry_point()))
                          .findFirst()
                          .orElse(BeanTemplateUtils.build(SharedLibraryBean.class).done().get()));
                  // (else this is a passthrough topology, so just use a dummy library bean)
                });

            _logger.info(
                "Set active class="
                    + enrichment_topology.getClass()
                    + " message="
                    + m.getClass().getSimpleName()
                    + " bucket="
                    + bucket.full_name());

            return Patterns.match(m)
                .<CompletableFuture<BucketActionReplyMessage>>andReturn()
                .when(
                    BucketActionMessage.DeleteBucketActionMessage.class,
                    msg -> {
                      return StormControllerUtil.stopJob(storm_controller, bucket);
                    })
                .when(
                    BucketActionMessage.NewBucketActionMessage.class,
                    msg -> {
                      if (!msg.is_suspended())
                        return StormControllerUtil.startJob(
                            storm_controller,
                            bucket,
                            context,
                            user_lib_paths,
                            enrichment_topology,
                            cached_jars_dir);
                      else
                        return StormControllerUtil.stopJob(
                            storm_controller,
                            bucket); // (nothing to do but just do this to return something
                      // sensible)
                    })
                .when(
                    BucketActionMessage.UpdateBucketActionMessage.class,
                    msg -> {
                      if (msg.is_enabled())
                        return StormControllerUtil.restartJob(
                            storm_controller,
                            bucket,
                            context,
                            user_lib_paths,
                            enrichment_topology,
                            cached_jars_dir);
                      else return StormControllerUtil.stopJob(storm_controller, bucket);
                    })
                .when(
                    BucketActionMessage.TestBucketActionMessage.class,
                    msg -> {
                      // TODO (ALEPH-25): in the future run this test with local storm rather than
                      // remote storm_controller
                      return StormControllerUtil.restartJob(
                          storm_controller,
                          bucket,
                          context,
                          user_lib_paths,
                          enrichment_topology,
                          cached_jars_dir);
                    })
                .otherwise(
                    msg -> {
                      return CompletableFuture.completedFuture(
                          new BucketActionHandlerMessage(
                              source,
                              new BasicMessageBean(
                                  new Date(),
                                  false,
                                  null,
                                  "Unknown message",
                                  0,
                                  "Unknown message",
                                  null)));
                    });
          });
    } catch (Throwable e) { // (trying to use Validation to avoid this, but just in case...)
      return CompletableFuture.completedFuture(
          new BucketActionHandlerMessage(
              source,
              new BasicMessageBean(
                  new Date(),
                  false,
                  null,
                  ErrorUtils.getLongForm("Error loading streaming class: {0}", e),
                  0,
                  ErrorUtils.getLongForm("Error loading streaming class: {0}", e),
                  null)));
    }
  }