@Test public void test_verbosenessSettings() { final List<Object> l_true = Arrays.asList(1, "1", "true", true, "TRUE", "True"); final List<Object> l_false = Arrays.asList(0, "0", "false", false, "FALSE", "False"); for (Object o : l_true) { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder().put("verbose", o).build()) .done() .get(); assertEquals(true, ElasticsearchIndexService.is_verbose(s)); } for (Object o : l_false) { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder().put("verbose", o).build()) .done() .get(); assertEquals(false, ElasticsearchIndexService.is_verbose(s)); } // (not present) { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder().build()) .done() .get(); assertEquals(false, ElasticsearchIndexService.is_verbose(s)); } { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class).done().get(); assertEquals(false, ElasticsearchIndexService.is_verbose(s)); } }
protected DataBucketBean createBucket(final String harvest_tech_id) { // (Add streaming logic outside this via clone() - see cacheJars) return BeanTemplateUtils.build(DataBucketBean.class) .with(DataBucketBean::_id, "test1") .with(DataBucketBean::owner_id, "person_id") .with(DataBucketBean::full_name, "/test/path/") .with(DataBucketBean::harvest_technology_name_or_id, harvest_tech_id) .done() .get(); }
/** * Initializes the storm instance * * @return a real storm controller if possible, else a no controller */ @SuppressWarnings("unchecked") public static IStormController getController() { final GlobalPropertiesBean globals = Lambdas.get( () -> { try { return BeanTemplateUtils.from( PropertiesUtils.getSubConfig( ModuleUtils.getStaticConfig(), GlobalPropertiesBean.PROPERTIES_ROOT) .orElse(null), GlobalPropertiesBean.class); } catch (IOException e) { _logger.error( ErrorUtils.getLongForm( "Couldn't set globals property bean in storm harvest tech onInit: {0}", e)); return null; } }); if (null == globals) { return new NoStormController(); } _logger.info( "Loading storm config from: " + globals.local_yarn_config_dir() + File.separator + "storm.yaml"); Yaml yaml = new Yaml(); InputStream input; Map<String, Object> object; try { input = new FileInputStream( new File(globals.local_yarn_config_dir() + File.separator + "storm.yaml")); object = (Map<String, Object>) yaml.load(input); } catch (FileNotFoundException e) { _logger.error( ErrorUtils.getLongForm("Error reading storm.yaml in storm harvest tech onInit: {0}", e)); object = new HashMap<String, Object>(); } if (object.containsKey(backtype.storm.Config.NIMBUS_HOST)) { _logger.info("starting in remote mode v5"); _logger.info(object.get(backtype.storm.Config.NIMBUS_HOST)); // run in distributed mode IStormController storm_controller = StormControllerUtil.getRemoteStormController( (String) object.get(backtype.storm.Config.NIMBUS_HOST), (int) object.get(backtype.storm.Config.NIMBUS_THRIFT_PORT), (String) object.get(backtype.storm.Config.STORM_THRIFT_TRANSPORT_PLUGIN)); return storm_controller; } else { return new NoStormController(); } }
protected List<SharedLibraryBean> createSharedLibraryBeans(Path path1, Path path2) { final SharedLibraryBean lib_element = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "test_tech_id_stream") .with(SharedLibraryBean::path_name, path1.toString()) .with( SharedLibraryBean::misc_entry_point, "com.ikanow.aleph2.data_import.stream_enrichment.storm.PassthroughTopology") .done() .get(); final SharedLibraryBean lib_element2 = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "test_module_id") .with(SharedLibraryBean::path_name, path2.toString()) .done() .get(); final SharedLibraryBean lib_element3 = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "failtest") .with(SharedLibraryBean::path_name, "/not_exist/here.fghgjhgjhg") .done() .get(); // (result in classloader error) final SharedLibraryBean lib_element4 = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "test_tech_id_stream_fail") .with(SharedLibraryBean::path_name, path1.toString()) .with( SharedLibraryBean::streaming_enrichment_entry_point, "com.ikanow.aleph2.test.example.ExampleStreamTopology") .done() .get(); return Arrays.asList(lib_element, lib_element2, lib_element3, lib_element4); }
@Override public Tuple2<Long, IBatchRecord> getNextRecord( long currentFileIndex, String fileName, InputStream inStream) { ObjectMapper object_mapper = BeanTemplateUtils.configureMapper(Optional.empty()); Tuple2<Long, IBatchRecord> t2 = null; try { JsonNode node = object_mapper.readTree(inStream); t2 = new Tuple2<Long, IBatchRecord>( currentFileIndex, new BeFileInputReader.BatchRecord(node, null)); } catch (Exception e) { logger.error("JsonParser caught exception", e); } return t2; }
@Test public void test_indexNotEnabled() { final DataBucketBean db1 = BeanTemplateUtils.build(DataBucketBean.class).done().get(); assertEquals( Optional.empty(), _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, db1, Optional.empty(), Optional.empty()))); final DataBucketBean db2 = BeanTemplateUtils.build(DataBucketBean.class) .with("data_schema", BeanTemplateUtils.build(DataSchemaBean.class).done().get()) .done() .get(); assertEquals( Optional.empty(), _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, db2, Optional.empty(), Optional.empty()))); final DataBucketBean db3 = BeanTemplateUtils.build(DataBucketBean.class) .with( "data_schema", BeanTemplateUtils.build(DataSchemaBean.class) .with( "search_index_schema", BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with("enabled", false) .done() .get()) .done() .get()) .done() .get(); assertEquals( Optional.empty(), _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, db3, Optional.empty(), Optional.empty()))); }
@Test public void test_deleteNonexistantBucket() throws JsonParseException, JsonMappingException, IOException, InterruptedException, ExecutionException { final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("_id", "2b_test_end_2_end_not_exist") .with("full_name", "/test/end-end/fixed/fixed/not/exist") .done() .get(); final BasicMessageBean result = _index_service .getDataService() .get() .handleBucketDeletionRequest(bucket, Optional.empty(), true) .get(); assertEquals("Deletion should succeed: " + result.message(), true, result.success()); }
@Override protected void configure() { final Config config = ModuleUtils.getStaticConfig(); try { final DataImportConfigurationBean bean = BeanTemplateUtils.from( PropertiesUtils.getSubConfig(config, DataImportConfigurationBean.PROPERTIES_ROOT) .orElse(null), DataImportConfigurationBean.class); this.bind(DataImportConfigurationBean.class).toInstance(bean); this.bind(AnalyticStateTriggerCheckFactory.class).in(Scopes.SINGLETON); } catch (Exception e) { throw new RuntimeException( ErrorUtils.get( ErrorUtils.INVALID_CONFIG_ERROR, DataImportConfigurationBean.class.toString(), config.getConfig(DataImportConfigurationBean.PROPERTIES_ROOT)), e); } }
@Test public void test_handleMultiBucket() { // Will currently fail because read-only indices not yet supported final DataBucketBean multi_bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("_id", "test_multi_bucket") .with("multi_bucket_children", ImmutableSet.builder().add("test1").build()) .done() .get(); try { _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, multi_bucket, Optional.empty(), Optional.empty())); fail("Should have thrown an exception"); } catch (Exception e) { // (don't care about anything else here, this is mostly just for coverage at this point) } }
/* (non-Javadoc) * @see akka.actor.AbstractActor#receive() */ @Override public PartialFunction<Object, BoxedUnit> receive() { return ReceiveBuilder.match( BucketActionMessage.class, m -> !m.handling_clients().isEmpty() && !m.handling_clients() .contains(_context.getInformationService().getHostname()), __ -> {}) // (do nothing if it's not for me) .match( BucketActionOfferMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket {3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (this isn't async so doesn't require any futures) final boolean accept_or_ignore = new File(_globals.local_yarn_config_dir() + File.separator + "storm.yaml") .exists(); final BucketActionReplyMessage reply = accept_or_ignore ? new BucketActionReplyMessage.BucketActionWillAcceptMessage(hostname) : new BucketActionReplyMessage.BucketActionIgnoredMessage(hostname); closing_sender.tell(reply, closing_self); }) .match( BucketActionMessage.class, m -> { _logger.info( ErrorUtils.get( "Actor {0} received message {1} from {2} bucket={3}", this.self(), m.getClass().getSimpleName(), this.sender(), m.bucket().full_name())); final ActorRef closing_sender = this.sender(); final ActorRef closing_self = this.self(); final String hostname = _context.getInformationService().getHostname(); // (cacheJars can't throw checked or unchecked in this thread, only from within // exceptions) cacheJars( m.bucket(), _management_db, _globals, _fs, _context.getServiceContext(), hostname, m) .thenComposeAsync( err_or_map -> { final StreamingEnrichmentContext e_context = _context.getNewStreamingEnrichmentContext(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_tech_module = getStreamingTopology(m.bucket(), m, hostname, err_or_map); final CompletableFuture<BucketActionReplyMessage> ret = talkToStream( _storm_controller, m.bucket(), m, err_or_tech_module, err_or_map, hostname, e_context, _globals.local_yarn_config_dir(), _globals.local_cached_jar_dir()); return ret; }) .thenAccept( reply -> { // (reply can contain an error or successful reply, they're the // same bean type) // Some information logging: Patterns.match(reply) .andAct() .when( BucketActionHandlerMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}, success={2}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.reply().success()))) .when( BucketActionReplyMessage.BucketActionWillAcceptMessage.class, msg -> _logger.info( ErrorUtils.get( "Standard reply to message={0}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name()))) .otherwise( msg -> _logger.info( ErrorUtils.get( "Unusual reply to message={0}, type={2}, bucket={1}", m.getClass().getSimpleName(), m.bucket().full_name(), msg.getClass().getSimpleName()))); closing_sender.tell(reply, closing_self); }) .exceptionally( e -> { // another bit of error handling that shouldn't ever be called but is a // useful backstop // Some information logging: _logger.warn( "Unexpected error replying to '{0}': error = {1}, bucket={2}", BeanTemplateUtils.toJson(m).toString(), ErrorUtils.getLongForm("{0}", e), m.bucket().full_name()); final BasicMessageBean error_bean = SharedErrorUtils.buildErrorMessage( hostname, m, ErrorUtils.getLongForm( StreamErrorUtils.STREAM_UNKNOWN_ERROR, e, m.bucket().full_name())); closing_sender.tell( new BucketActionHandlerMessage(hostname, error_bean), closing_self); return null; }); }) .build(); }
@Test public void test_ageOut() throws IOException, InterruptedException, ExecutionException { // Call test_endToEnd_autoTime to create 5 time based indexes // 2015-01-01 -> 2015-05-01 // How far is now from 2015-05-03 final Date d = TimeUtils.getDateFromSuffix("2015-03-02").success(); final long total_time_ms = new Date().getTime() - d.getTime(); final long total_days = total_time_ms / (1000L * 3600L * 24L); final String age_out = ErrorUtils.get("{0} days", total_days); final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/end-end/auto-time") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, age_out) .done() .get()) .done() .get()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); test_endToEnd_autoTime(false); _index_service ._crud_factory .getClient() .admin() .indices() .prepareCreate(template_name + "_2015-03-01_1") .execute() .actionGet(); final GetMappingsResponse gmr = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(6, gmr.getMappings().keys().size()); CompletableFuture<BasicMessageBean> cf = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res = cf.get(); assertEquals(true, res.success()); assertTrue("sensible message: " + res.message(), res.message().contains(" 2 ")); assertTrue( "Message marked as loggable: " + res.details(), Optional.ofNullable(res.details()).filter(m -> m.containsKey("loggable")).isPresent()); System.out.println("Return from to delete: " + res.message()); Thread.sleep(5000L); // give the indexes time to delete final GetMappingsResponse gmr2 = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(3, gmr2.getMappings().keys().size()); // Check some edge cases: // 1) Run it again, returns success but not loggable: CompletableFuture<BasicMessageBean> cf2 = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res2 = cf2.get(); assertEquals(true, res2.success()); assertTrue("sensible message: " + res2.message(), res2.message().contains(" 0 ")); assertTrue( "Message _not_ marked as loggable: " + res2.details(), !Optional.ofNullable(res2.details()).map(m -> m.get("loggable")).isPresent()); // 2) No temporal settings final DataBucketBean bucket3 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/not/temporal") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).done().get()) .done() .get(); CompletableFuture<BasicMessageBean> cf3 = _index_service.getDataService().get().handleAgeOutRequest(bucket3); BasicMessageBean res3 = cf3.get(); // no temporal settings => returns success assertEquals(true, res3.success()); // 3) Unparseable temporal settings (in theory won't validate but we can test here) final DataBucketBean bucket4 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/temporal/malformed") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, "bananas") .done() .get()) .done() .get()) .done() .get(); CompletableFuture<BasicMessageBean> cf4 = _index_service.getDataService().get().handleAgeOutRequest(bucket4); BasicMessageBean res4 = cf4.get(); // no temporal settings => returns success assertEquals(false, res4.success()); }
@Test public void test_endToEnd_fixedFixed() throws IOException, InterruptedException, ExecutionException { final Calendar time_setter = GregorianCalendar.getInstance(); time_setter.set(2015, 1, 1, 13, 0, 0); final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_end_2_end_bucket2.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class) .with("_id", "2b_test_end_2_end") .with("full_name", "/test/end-end/fixed/fixed") .with("modified", time_setter.getTime()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); // Check starting from clean { try { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) try { _crud_factory .getClient() .admin() .indices() .prepareDelete(template_name + "*") .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); } final ICrudService<JsonNode> index_service_crud = _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, bucket, Optional.empty(), Optional.empty())) .flatMap(IDataWriteService::getCrudService) .get(); // Check template added: { final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); } // Get batch sub-service @SuppressWarnings("unchecked") final Optional<ICrudService.IBatchSubservice<JsonNode>> batch_service = index_service_crud .getUnderlyingPlatformDriver(ICrudService.IBatchSubservice.class, Optional.empty()) .map(t -> (IBatchSubservice<JsonNode>) t); { assertTrue("Batch service must exist", batch_service.isPresent()); } // Get information about the crud service final ElasticsearchContext es_context = (ElasticsearchContext) index_service_crud .getUnderlyingPlatformDriver(ElasticsearchContext.class, Optional.empty()) .get(); { assertTrue("Read write index", es_context instanceof ElasticsearchContext.ReadWriteContext); assertTrue( "Temporal index", es_context.indexContext() instanceof ElasticsearchContext.IndexContext.ReadWriteIndexContext.FixedRwIndexContext); assertTrue( "Auto type", es_context.typeContext() instanceof ElasticsearchContext.TypeContext.ReadWriteTypeContext.FixedRwTypeContext); } // Write some docs out Arrays.asList(1, 2, 3, 4, 5) .stream() .map( i -> { time_setter.set(2015, i, 1, 13, 0, 0); return time_setter.getTime(); }) .map(d -> (ObjectNode) _mapper.createObjectNode().put("@timestamp", d.getTime())) .forEach( o -> { ObjectNode o1 = o.deepCopy(); o1.put("val1", 10); ObjectNode o2 = o.deepCopy(); o2.put("val1", "test"); batch_service.get().storeObject(o1, false); batch_service.get().storeObject(o2, false); }); // (give it a chance to run) Thread.sleep(5000L); final GetMappingsResponse gmr = es_context .client() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); // Should have 5 different indexes, each with 2 types + _default_ assertEquals(1, gmr.getMappings().keys().size()); final Set<String> expected_keys = Arrays.asList("test_fixed_fixed__1cb6bdcdf44f").stream().collect(Collectors.toSet()); final Set<String> expected_types = Arrays.asList("data_object").stream().collect(Collectors.toSet()); StreamSupport.stream(gmr.getMappings().spliterator(), false) .forEach( x -> { assertTrue( "Is one of the expected keys: " + x.key + " vs " + expected_keys.stream().collect(Collectors.joining(":")), expected_keys.contains(x.key)); // Size 1: data_object assertEquals(1, x.value.size()); // DEBUG // System.out.println(" ? " + x.key); StreamSupport.stream(x.value.spliterator(), false) .forEach( Lambdas.wrap_consumer_u( y -> { // DEBUG // System.out.println("?? " + y.key + " --- " + // y.value.sourceAsMap().toString()); assertTrue( "Is expected type: " + y.key, expected_types.contains(y.key)); })); }); // TEST DELETION: test_handleDeleteOrPurge(bucket, false); }
public class TestElasticsearchIndexService { public static ObjectMapper _mapper = BeanTemplateUtils.configureMapper(Optional.empty()); protected MockElasticsearchIndexService _index_service; protected IElasticsearchCrudServiceFactory _crud_factory; protected ElasticsearchIndexServiceConfigBean _config_bean; // Set this string to connect vs a real DB private final String _connection_string = null; private final String _cluster_name = null; // private final String _connection_string = "localhost:4093"; // private final String _cluster_name = "infinite-dev"; @Before public void setupServices() { final Config full_config = ConfigFactory.empty(); if (null == _connection_string) { _crud_factory = new MockElasticsearchCrudServiceFactory(); } else { final ElasticsearchConfigurationBean config_bean = new ElasticsearchConfigurationBean(_connection_string, _cluster_name); _crud_factory = new ElasticsearchCrudServiceFactory(config_bean); } _config_bean = ElasticsearchIndexConfigUtils.buildConfigBean(full_config); _index_service = new MockElasticsearchIndexService(_crud_factory, _config_bean); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // VALIDATION @Test public void test_verbosenessSettings() { final List<Object> l_true = Arrays.asList(1, "1", "true", true, "TRUE", "True"); final List<Object> l_false = Arrays.asList(0, "0", "false", false, "FALSE", "False"); for (Object o : l_true) { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder().put("verbose", o).build()) .done() .get(); assertEquals(true, ElasticsearchIndexService.is_verbose(s)); } for (Object o : l_false) { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder().put("verbose", o).build()) .done() .get(); assertEquals(false, ElasticsearchIndexService.is_verbose(s)); } // (not present) { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder().build()) .done() .get(); assertEquals(false, ElasticsearchIndexService.is_verbose(s)); } { final DataSchemaBean.SearchIndexSchemaBean s = BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class).done().get(); assertEquals(false, ElasticsearchIndexService.is_verbose(s)); } } @Test public void test_validationSuccess() throws IOException { final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class).done().get(); // 1) Verbose mode off { final Collection<BasicMessageBean> res_col = _index_service.validateSchema(bucket.data_schema().columnar_schema(), bucket)._2(); final Collection<BasicMessageBean> res_search = _index_service.validateSchema(bucket.data_schema().search_index_schema(), bucket)._2(); final Collection<BasicMessageBean> res_time = _index_service.validateSchema(bucket.data_schema().temporal_schema(), bucket)._2(); assertEquals(0, res_col.size()); assertEquals(0, res_search.size()); assertEquals(0, res_time.size()); } // 2) Verbose mode on { final DataBucketBean bucket_verbose = BeanTemplateUtils.clone(bucket) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket.data_schema()) .with( DataSchemaBean::search_index_schema, BeanTemplateUtils.clone(bucket.data_schema().search_index_schema()) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder() .putAll( bucket .data_schema() .search_index_schema() .technology_override_schema()) .put("verbose", true) .build()) .done()) .done()) .done(); final Collection<BasicMessageBean> res_col = _index_service .validateSchema(bucket_verbose.data_schema().columnar_schema(), bucket) ._2(); final Collection<BasicMessageBean> res_search = _index_service .validateSchema(bucket_verbose.data_schema().search_index_schema(), bucket) ._2(); final Collection<BasicMessageBean> res_time = _index_service .validateSchema(bucket_verbose.data_schema().temporal_schema(), bucket) ._2(); assertEquals(0, res_col.size()); assertEquals(0, res_time.size()); assertEquals(2, res_search.size()); assertEquals(true, res_search.stream().allMatch(BasicMessageBean::success)); Iterator<BasicMessageBean> res_search_message = res_search.iterator(); final String mapping_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_verbose_mapping_validate_results.json"), Charsets.UTF_8); final JsonNode mapping_json = _mapper.readTree(mapping_str.getBytes()); assertEquals( mapping_json.toString(), _mapper.readTree(res_search_message.next().message()).toString()); assertTrue( "Sets the max index override: " + res_search.stream().skip(1).map(m -> m.message()).collect(Collectors.joining()), res_search_message.next().message().contains("1,000 MB")); } // 3) Temporal { final DataBucketBean bucket_temporal_no_grouping = BeanTemplateUtils.clone(bucket) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket.data_schema()) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(DataSchemaBean.TemporalSchemaBean.class) .done() .get()) .done()) .done(); assertEquals( "", _index_service .validateSchema(bucket_temporal_no_grouping.data_schema().temporal_schema(), bucket) ._1()); final DataBucketBean bucket_temporal_grouping = BeanTemplateUtils.clone(bucket) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket.data_schema()) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(DataSchemaBean.TemporalSchemaBean.class) .with(DataSchemaBean.TemporalSchemaBean::grouping_time_period, "1d") .done() .get()) .done()) .done(); assertEquals( "_{yyyy-MM-dd}", _index_service .validateSchema(bucket_temporal_grouping.data_schema().temporal_schema(), bucket) ._1()); } } @Test public void test_validationFail() throws IOException { final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_fail.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class).done().get(); // 1) Verbose mode off { final Collection<BasicMessageBean> res_col = _index_service.validateSchema(bucket.data_schema().columnar_schema(), bucket)._2(); final Collection<BasicMessageBean> res_search = _index_service.validateSchema(bucket.data_schema().search_index_schema(), bucket)._2(); final Collection<BasicMessageBean> res_time = _index_service.validateSchema(bucket.data_schema().temporal_schema(), bucket)._2(); assertEquals(0, res_col.size()); assertEquals(0, res_time.size()); assertEquals(1, res_search.size()); final BasicMessageBean res_search_message = res_search.iterator().next(); assertEquals(false, res_search_message.success()); } // 2) Check setting an invalid max index size { final String bucket_str_2 = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket2 = BeanTemplateUtils.build(bucket_str_2, DataBucketBean.class).done().get(); final DataBucketBean bucket_too_small = BeanTemplateUtils.clone(bucket2) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket2.data_schema()) .with( DataSchemaBean::search_index_schema, BeanTemplateUtils.clone(bucket2.data_schema().search_index_schema()) .with(DataSchemaBean.SearchIndexSchemaBean::target_index_size_mb, 10L) .done()) .done()) .done(); final Collection<BasicMessageBean> res_search = _index_service .validateSchema(bucket.data_schema().search_index_schema(), bucket_too_small) ._2(); assertEquals(1, res_search.size()); assertEquals(false, res_search.stream().allMatch(BasicMessageBean::success)); BasicMessageBean res_search_message = res_search.iterator().next(); assertTrue( "Right message: " + res_search_message.message(), res_search_message.message().contains("10 MB")); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // INDEX NOT ENABLED @Test public void test_indexNotEnabled() { final DataBucketBean db1 = BeanTemplateUtils.build(DataBucketBean.class).done().get(); assertEquals( Optional.empty(), _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, db1, Optional.empty(), Optional.empty()))); final DataBucketBean db2 = BeanTemplateUtils.build(DataBucketBean.class) .with("data_schema", BeanTemplateUtils.build(DataSchemaBean.class).done().get()) .done() .get(); assertEquals( Optional.empty(), _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, db2, Optional.empty(), Optional.empty()))); final DataBucketBean db3 = BeanTemplateUtils.build(DataBucketBean.class) .with( "data_schema", BeanTemplateUtils.build(DataSchemaBean.class) .with( "search_index_schema", BeanTemplateUtils.build(DataSchemaBean.SearchIndexSchemaBean.class) .with("enabled", false) .done() .get()) .done() .get()) .done() .get(); assertEquals( Optional.empty(), _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, db3, Optional.empty(), Optional.empty()))); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // INDEX MANAGEMENT @Test public void test_indexCreation() throws IOException { final Calendar time_setter = GregorianCalendar.getInstance(); time_setter.set(2015, 1, 1, 13, 0, 0); final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class) .with("modified", time_setter.getTime()) .done() .get(); final String mapping_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_verbose_mapping_validate_results.json"), Charsets.UTF_8); final JsonNode mapping_json = _mapper.readTree(mapping_str.getBytes()); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); try { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) // Create index template from empty { final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); _index_service.handlePotentiallyNewIndex( bucket, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); assertTrue( "Mappings should be equivalent", ElasticsearchIndexService.mappingsAreEquivalent( gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); } // Check is ignored subsequently (same date, same content; same date, different content) { _index_service.handlePotentiallyNewIndex( bucket, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); } // Check is checked-but-left if time updated, content not { time_setter.set(2015, 1, 1, 14, 0, 0); final Date next_time = time_setter.getTime(); final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket).with("modified", next_time).done(); _index_service.handlePotentiallyNewIndex( bucket2, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(next_time, _index_service._bucket_template_cache.get(bucket._id())); assertEquals(1, gtr2.getIndexTemplates().size()); } // Check is updated if time-and-content is different { time_setter.set(2015, 1, 1, 15, 0, 0); final String bucket_str2 = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket2_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket2 = BeanTemplateUtils.build(bucket_str2, DataBucketBean.class) .with("modified", time_setter.getTime()) .done() .get(); _index_service.handlePotentiallyNewIndex( bucket2, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(time_setter.getTime(), _index_service._bucket_template_cache.get(bucket._id())); assertEquals(1, gtr2.getIndexTemplates().size()); assertFalse( ElasticsearchIndexService.mappingsAreEquivalent( gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); // has changed } // Check if mapping is deleted then next time bucket modified is updated then the mapping is // recreated { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); // (check with old date) final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); { _index_service.handlePotentiallyNewIndex( bucket, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertTrue("Initially no change", gtr2.getIndexTemplates().isEmpty()); } // Update date and retry { time_setter.set(2015, 1, 1, 16, 0, 0); final Date next_time = time_setter.getTime(); final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket).with("modified", next_time).done(); _index_service.handlePotentiallyNewIndex( bucket2, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); assertTrue( "Mappings should be equivalent", ElasticsearchIndexService.mappingsAreEquivalent( gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); } } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // END-TO-END @Test public void test_handleMultiBucket() { // Will currently fail because read-only indices not yet supported final DataBucketBean multi_bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("_id", "test_multi_bucket") .with("multi_bucket_children", ImmutableSet.builder().add("test1").build()) .done() .get(); try { _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, multi_bucket, Optional.empty(), Optional.empty())); fail("Should have thrown an exception"); } catch (Exception e) { // (don't care about anything else here, this is mostly just for coverage at this point) } } // (including getCrudService) @Test public void test_endToEnd_autoTime() throws IOException, InterruptedException, ExecutionException { test_endToEnd_autoTime(true); } public void test_endToEnd_autoTime(boolean test_not_create_mode) throws IOException, InterruptedException, ExecutionException { final Calendar time_setter = GregorianCalendar.getInstance(); time_setter.set(2015, 1, 1, 13, 0, 0); final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_end_2_end_bucket.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class) .with("_id", "test_end_2_end") .with("full_name", "/test/end-end/auto-time") .with("modified", time_setter.getTime()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); // Check starting from clean { try { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) try { _crud_factory .getClient() .admin() .indices() .prepareDelete(template_name + "*") .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); } final ICrudService<JsonNode> index_service_crud = _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, bucket, Optional.empty(), Optional.empty())) .flatMap(IDataWriteService::getCrudService) .get(); // Check template added: { final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); } // Get batch sub-service @SuppressWarnings("unchecked") final Optional<ICrudService.IBatchSubservice<JsonNode>> batch_service = index_service_crud .getUnderlyingPlatformDriver(ICrudService.IBatchSubservice.class, Optional.empty()) .map(t -> (IBatchSubservice<JsonNode>) t); { assertTrue("Batch service must exist", batch_service.isPresent()); } // Get information about the crud service final ElasticsearchContext es_context = (ElasticsearchContext) index_service_crud .getUnderlyingPlatformDriver(ElasticsearchContext.class, Optional.empty()) .get(); { assertTrue("Read write index", es_context instanceof ElasticsearchContext.ReadWriteContext); assertTrue( "Temporal index", es_context.indexContext() instanceof ElasticsearchContext.IndexContext.ReadWriteIndexContext.TimedRwIndexContext); assertTrue( "Auto type", es_context.typeContext() instanceof ElasticsearchContext.TypeContext.ReadWriteTypeContext.AutoRwTypeContext); // Check the the context contains the invalid final ElasticsearchContext.TypeContext.ReadWriteTypeContext.AutoRwTypeContext context = (ElasticsearchContext.TypeContext.ReadWriteTypeContext.AutoRwTypeContext) es_context.typeContext(); assertEquals( Arrays.asList("@timestamp"), context.fixed_type_fields().stream().collect(Collectors.toList())); } // Write some docs out Arrays.asList(1, 2, 3, 4, 5) .stream() .map( i -> { time_setter.set(2015, i, 1, 13, 0, 0); return time_setter.getTime(); }) .map(d -> (ObjectNode) _mapper.createObjectNode().put("@timestamp", d.getTime())) .forEach( o -> { ObjectNode o1 = o.deepCopy(); o1.set("val1", _mapper.createObjectNode().put("val2", "test")); ObjectNode o2 = o.deepCopy(); o2.put("val1", "test"); batch_service.get().storeObject(o1, false); batch_service.get().storeObject(o2, false); }); for (int i = 0; i < 30; ++i) { Thread.sleep(1000L); if (index_service_crud.countObjects().get() >= 10) { System.out.println("Test end 2 end: (Got all the records)"); break; } } final GetMappingsResponse gmr = es_context .client() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); // Should have 5 different indexes, each with 2 types + _default_ assertEquals(5, gmr.getMappings().keys().size()); final Set<String> expected_keys = Arrays.asList(1, 2, 3, 4, 5) .stream() .map(i -> template_name + "_2015-0" + (i + 1) + "-01") .collect(Collectors.toSet()); final Set<String> expected_types = Arrays.asList("_default_", "type_1", "type_2").stream().collect(Collectors.toSet()); if (test_not_create_mode) StreamSupport.stream(gmr.getMappings().spliterator(), false) .forEach( x -> { assertTrue( "Is one of the expected keys: " + x.key + " vs " + expected_keys.stream().collect(Collectors.joining(":")), expected_keys.contains(x.key)); // DEBUG // System.out.println(" ? " + x.key); StreamSupport.stream(x.value.spliterator(), false) .forEach( Lambdas.wrap_consumer_u( y -> { // DEBUG // System.out.println("?? " + y.key + " --- " + // y.value.sourceAsMap().toString()); // Size 3: _default_, type1 and type2 assertTrue( "Is expected type: " + y.key, expected_types.contains(y.key)); })); // Size 3: _default_, type_1, type_2 assertEquals("Should have 3 indexes: " + x.value.toString(), 3, x.value.size()); }); // TEST DELETION: if (test_not_create_mode) test_handleDeleteOrPurge(bucket, true); } @Test public void test_endToEnd_fixedFixed() throws IOException, InterruptedException, ExecutionException { final Calendar time_setter = GregorianCalendar.getInstance(); time_setter.set(2015, 1, 1, 13, 0, 0); final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_end_2_end_bucket2.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class) .with("_id", "2b_test_end_2_end") .with("full_name", "/test/end-end/fixed/fixed") .with("modified", time_setter.getTime()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); // Check starting from clean { try { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) try { _crud_factory .getClient() .admin() .indices() .prepareDelete(template_name + "*") .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); } final ICrudService<JsonNode> index_service_crud = _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, bucket, Optional.empty(), Optional.empty())) .flatMap(IDataWriteService::getCrudService) .get(); // Check template added: { final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); } // Get batch sub-service @SuppressWarnings("unchecked") final Optional<ICrudService.IBatchSubservice<JsonNode>> batch_service = index_service_crud .getUnderlyingPlatformDriver(ICrudService.IBatchSubservice.class, Optional.empty()) .map(t -> (IBatchSubservice<JsonNode>) t); { assertTrue("Batch service must exist", batch_service.isPresent()); } // Get information about the crud service final ElasticsearchContext es_context = (ElasticsearchContext) index_service_crud .getUnderlyingPlatformDriver(ElasticsearchContext.class, Optional.empty()) .get(); { assertTrue("Read write index", es_context instanceof ElasticsearchContext.ReadWriteContext); assertTrue( "Temporal index", es_context.indexContext() instanceof ElasticsearchContext.IndexContext.ReadWriteIndexContext.FixedRwIndexContext); assertTrue( "Auto type", es_context.typeContext() instanceof ElasticsearchContext.TypeContext.ReadWriteTypeContext.FixedRwTypeContext); } // Write some docs out Arrays.asList(1, 2, 3, 4, 5) .stream() .map( i -> { time_setter.set(2015, i, 1, 13, 0, 0); return time_setter.getTime(); }) .map(d -> (ObjectNode) _mapper.createObjectNode().put("@timestamp", d.getTime())) .forEach( o -> { ObjectNode o1 = o.deepCopy(); o1.put("val1", 10); ObjectNode o2 = o.deepCopy(); o2.put("val1", "test"); batch_service.get().storeObject(o1, false); batch_service.get().storeObject(o2, false); }); // (give it a chance to run) Thread.sleep(5000L); final GetMappingsResponse gmr = es_context .client() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); // Should have 5 different indexes, each with 2 types + _default_ assertEquals(1, gmr.getMappings().keys().size()); final Set<String> expected_keys = Arrays.asList("test_fixed_fixed__1cb6bdcdf44f").stream().collect(Collectors.toSet()); final Set<String> expected_types = Arrays.asList("data_object").stream().collect(Collectors.toSet()); StreamSupport.stream(gmr.getMappings().spliterator(), false) .forEach( x -> { assertTrue( "Is one of the expected keys: " + x.key + " vs " + expected_keys.stream().collect(Collectors.joining(":")), expected_keys.contains(x.key)); // Size 1: data_object assertEquals(1, x.value.size()); // DEBUG // System.out.println(" ? " + x.key); StreamSupport.stream(x.value.spliterator(), false) .forEach( Lambdas.wrap_consumer_u( y -> { // DEBUG // System.out.println("?? " + y.key + " --- " + // y.value.sourceAsMap().toString()); assertTrue( "Is expected type: " + y.key, expected_types.contains(y.key)); })); }); // TEST DELETION: test_handleDeleteOrPurge(bucket, false); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // TEST AGE OUT @Test public void test_ageOut() throws IOException, InterruptedException, ExecutionException { // Call test_endToEnd_autoTime to create 5 time based indexes // 2015-01-01 -> 2015-05-01 // How far is now from 2015-05-03 final Date d = TimeUtils.getDateFromSuffix("2015-03-02").success(); final long total_time_ms = new Date().getTime() - d.getTime(); final long total_days = total_time_ms / (1000L * 3600L * 24L); final String age_out = ErrorUtils.get("{0} days", total_days); final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/end-end/auto-time") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, age_out) .done() .get()) .done() .get()) .done() .get(); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); test_endToEnd_autoTime(false); _index_service ._crud_factory .getClient() .admin() .indices() .prepareCreate(template_name + "_2015-03-01_1") .execute() .actionGet(); final GetMappingsResponse gmr = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(6, gmr.getMappings().keys().size()); CompletableFuture<BasicMessageBean> cf = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res = cf.get(); assertEquals(true, res.success()); assertTrue("sensible message: " + res.message(), res.message().contains(" 2 ")); assertTrue( "Message marked as loggable: " + res.details(), Optional.ofNullable(res.details()).filter(m -> m.containsKey("loggable")).isPresent()); System.out.println("Return from to delete: " + res.message()); Thread.sleep(5000L); // give the indexes time to delete final GetMappingsResponse gmr2 = _index_service ._crud_factory .getClient() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(3, gmr2.getMappings().keys().size()); // Check some edge cases: // 1) Run it again, returns success but not loggable: CompletableFuture<BasicMessageBean> cf2 = _index_service.getDataService().get().handleAgeOutRequest(bucket); BasicMessageBean res2 = cf2.get(); assertEquals(true, res2.success()); assertTrue("sensible message: " + res2.message(), res2.message().contains(" 0 ")); assertTrue( "Message _not_ marked as loggable: " + res2.details(), !Optional.ofNullable(res2.details()).map(m -> m.get("loggable")).isPresent()); // 2) No temporal settings final DataBucketBean bucket3 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/not/temporal") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).done().get()) .done() .get(); CompletableFuture<BasicMessageBean> cf3 = _index_service.getDataService().get().handleAgeOutRequest(bucket3); BasicMessageBean res3 = cf3.get(); // no temporal settings => returns success assertEquals(true, res3.success()); // 3) Unparseable temporal settings (in theory won't validate but we can test here) final DataBucketBean bucket4 = BeanTemplateUtils.build(DataBucketBean.class) .with("full_name", "/test/handle/age/out/delete/temporal/malformed") .with( DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(TemporalSchemaBean.class) .with(TemporalSchemaBean::exist_age_max, "bananas") .done() .get()) .done() .get()) .done() .get(); CompletableFuture<BasicMessageBean> cf4 = _index_service.getDataService().get().handleAgeOutRequest(bucket4); BasicMessageBean res4 = cf4.get(); // no temporal settings => returns success assertEquals(false, res4.success()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // TEST DELETION // (these are called from the code above) public void test_handleDeleteOrPurge(final DataBucketBean to_handle, boolean delete_not_purge) throws InterruptedException, ExecutionException { System.out.println("****** Checking delete/purge"); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(to_handle); final ICrudService<JsonNode> index_service_crud = _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, to_handle, Optional.empty(), Optional.empty())) .flatMap(IDataWriteService::getCrudService) .get(); final ElasticsearchContext es_context = (ElasticsearchContext) index_service_crud .getUnderlyingPlatformDriver(ElasticsearchContext.class, Optional.empty()) .get(); // (Actually first off, check there's data and templates) // Data: { final GetMappingsResponse gmr = es_context .client() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertTrue("There are indexes", gmr.getMappings().keys().size() > 0); } // Templates: { final GetIndexTemplatesRequest gt_pre = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr_pre = _crud_factory.getClient().admin().indices().getTemplates(gt_pre).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr_pre.getIndexTemplates().size()); } // Then, perform request final BasicMessageBean result = _index_service .getDataService() .get() .handleBucketDeletionRequest(to_handle, Optional.empty(), delete_not_purge) .get(); assertEquals("Deletion should succeed: " + result.message(), true, result.success()); // Check templates gone iff deleting not purging if (delete_not_purge) { final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates after deletion", gtr.getIndexTemplates().isEmpty()); } else { final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); } // Check all files deleted // Check via mappings { final GetMappingsResponse gmr = es_context .client() .admin() .indices() .prepareGetMappings(template_name + "*") .execute() .actionGet(); assertEquals(0, gmr.getMappings().keys().size()); } // Check via index size (recreates templates) final ICrudService<JsonNode> index_service_crud_2 = _index_service .getDataService() .flatMap( s -> s.getWritableDataService( JsonNode.class, to_handle, Optional.empty(), Optional.empty())) .flatMap(IDataWriteService::getCrudService) .get(); assertEquals(0, index_service_crud_2.countObjects().get().intValue()); } @Test public void test_deleteNonexistantBucket() throws JsonParseException, JsonMappingException, IOException, InterruptedException, ExecutionException { final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class) .with("_id", "2b_test_end_2_end_not_exist") .with("full_name", "/test/end-end/fixed/fixed/not/exist") .done() .get(); final BasicMessageBean result = _index_service .getDataService() .get() .handleBucketDeletionRequest(bucket, Optional.empty(), true) .get(); assertEquals("Deletion should succeed: " + result.message(), true, result.success()); } }
@Test public void test_indexCreation() throws IOException { final Calendar time_setter = GregorianCalendar.getInstance(); time_setter.set(2015, 1, 1, 13, 0, 0); final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class) .with("modified", time_setter.getTime()) .done() .get(); final String mapping_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_verbose_mapping_validate_results.json"), Charsets.UTF_8); final JsonNode mapping_json = _mapper.readTree(mapping_str.getBytes()); final String template_name = ElasticsearchIndexUtils.getBaseIndexName(bucket); try { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); } catch (Exception e) { } // (This is fine, just means it doesn't exist) // Create index template from empty { final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); _index_service.handlePotentiallyNewIndex( bucket, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); assertTrue( "Mappings should be equivalent", ElasticsearchIndexService.mappingsAreEquivalent( gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); } // Check is ignored subsequently (same date, same content; same date, different content) { _index_service.handlePotentiallyNewIndex( bucket, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); } // Check is checked-but-left if time updated, content not { time_setter.set(2015, 1, 1, 14, 0, 0); final Date next_time = time_setter.getTime(); final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket).with("modified", next_time).done(); _index_service.handlePotentiallyNewIndex( bucket2, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(next_time, _index_service._bucket_template_cache.get(bucket._id())); assertEquals(1, gtr2.getIndexTemplates().size()); } // Check is updated if time-and-content is different { time_setter.set(2015, 1, 1, 15, 0, 0); final String bucket_str2 = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket2_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket2 = BeanTemplateUtils.build(bucket_str2, DataBucketBean.class) .with("modified", time_setter.getTime()) .done() .get(); _index_service.handlePotentiallyNewIndex( bucket2, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(time_setter.getTime(), _index_service._bucket_template_cache.get(bucket._id())); assertEquals(1, gtr2.getIndexTemplates().size()); assertFalse( ElasticsearchIndexService.mappingsAreEquivalent( gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); // has changed } // Check if mapping is deleted then next time bucket modified is updated then the mapping is // recreated { _crud_factory .getClient() .admin() .indices() .prepareDeleteTemplate(template_name) .execute() .actionGet(); // (check with old date) final GetIndexTemplatesRequest gt = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr = _crud_factory.getClient().admin().indices().getTemplates(gt).actionGet(); assertTrue("No templates to start with", gtr.getIndexTemplates().isEmpty()); { _index_service.handlePotentiallyNewIndex( bucket, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertTrue("Initially no change", gtr2.getIndexTemplates().isEmpty()); } // Update date and retry { time_setter.set(2015, 1, 1, 16, 0, 0); final Date next_time = time_setter.getTime(); final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket).with("modified", next_time).done(); _index_service.handlePotentiallyNewIndex( bucket2, Optional.empty(), ElasticsearchIndexConfigUtils.buildConfigBeanFromSchema(bucket2, _config_bean, _mapper), "_default_"); final GetIndexTemplatesRequest gt2 = new GetIndexTemplatesRequest().names(template_name); final GetIndexTemplatesResponse gtr2 = _crud_factory.getClient().admin().indices().getTemplates(gt2).actionGet(); assertEquals(1, _index_service._bucket_template_cache.size()); assertEquals(1, gtr2.getIndexTemplates().size()); assertTrue( "Mappings should be equivalent", ElasticsearchIndexService.mappingsAreEquivalent( gtr2.getIndexTemplates().get(0), mapping_json, _mapper)); } } }
/** * Given a bucket ...returns either - a future containing the first error encountered, _or_ a map * (both name and id as keys) of path names (and guarantee that the file has been cached when the * future completes) * * @param bucket * @param management_db * @param globals * @param fs * @param handler_for_errors * @param msg_for_errors * @return a future containing the first error encountered, _or_ a map (both name and id as keys) * of path names */ @SuppressWarnings("unchecked") protected static <M> CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> cacheJars( final DataBucketBean bucket, final IManagementDbService management_db, final GlobalPropertiesBean globals, final IStorageService fs, final IServiceContext context, final String handler_for_errors, final M msg_for_errors) { try { MethodNamingHelper<SharedLibraryBean> helper = BeanTemplateUtils.from(SharedLibraryBean.class); final Optional<QueryComponent<SharedLibraryBean>> spec = getQuery(bucket); if (!spec.isPresent()) { return CompletableFuture.completedFuture( Validation.<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>success( Collections.emptyMap())); } return management_db .getSharedLibraryStore() .secured(context, new AuthorizationBean(bucket.owner_id())) .getObjectsBySpec(spec.get()) .thenComposeAsync( cursor -> { // This is a map of futures from the cache call - either an error or the path name // note we use a tuple of (id, name) as the key and then flatten out later final Map< Tuple2<String, String>, Tuple2< SharedLibraryBean, CompletableFuture<Validation<BasicMessageBean, String>>>> map_of_futures = StreamSupport.stream(cursor.spliterator(), true) .filter( lib -> { return true; }) .collect( Collectors .<SharedLibraryBean, Tuple2<String, String>, Tuple2< SharedLibraryBean, CompletableFuture< Validation<BasicMessageBean, String>>>> toMap( // want to keep both the name and id versions - will // flatten out below lib -> Tuples._2T(lib.path_name(), lib._id()), // (key) // spin off a future in which the file is being copied - // save the shared library bean also lib -> Tuples._2T( lib, // (value) JarCacheUtils.getCachedJar( globals.local_cached_jar_dir(), lib, fs, handler_for_errors, msg_for_errors)))); // denest from map of futures to future of maps, also handle any errors here: // (some sort of "lift" function would be useful here - this are a somewhat // inelegant few steps) final CompletableFuture<Validation<BasicMessageBean, String>>[] futures = (CompletableFuture<Validation<BasicMessageBean, String>>[]) map_of_futures .values() .stream() .map(t2 -> t2._2()) .collect(Collectors.toList()) .toArray(new CompletableFuture[0]); // (have to embed this thenApply instead of bringing it outside as part of the // toCompose chain, because otherwise we'd lose map_of_futures scope) return CompletableFuture.allOf(futures) .<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> thenApply( f -> { try { final Map<String, Tuple2<SharedLibraryBean, String>> almost_there = map_of_futures .entrySet() .stream() .flatMap( kv -> { final Validation<BasicMessageBean, String> ret = kv.getValue() ._2() .join(); // (must have already returned if // here return ret .<Stream< Tuple2< String, Tuple2<SharedLibraryBean, String>>>> validation( // Error: err -> { throw new RuntimeException( err.message()); } // (not ideal, but will do) , // Normal: s -> { return Arrays.asList( Tuples._2T( kv.getKey()._1(), Tuples._2T( kv.getValue()._1(), s)), // result object // with path_name Tuples._2T( kv.getKey()._2(), Tuples._2T( kv.getValue()._1(), s))) // result object // with id .stream(); }); }) .collect( Collectors .<Tuple2<String, Tuple2<SharedLibraryBean, String>>, String, Tuple2<SharedLibraryBean, String>> toMap( idname_path -> idname_path._1(), // (key) idname_path -> idname_path._2() // (value) )); return Validation .<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> success(almost_there); } catch ( Exception e) { // handle the exception thrown above containing the // message bean from whatever the original error was! return Validation .<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> fail( SharedErrorUtils.buildErrorMessage( handler_for_errors.toString(), msg_for_errors, e.getMessage())); } }); }); } catch (Throwable e) { // (can only occur if the DB call errors) return CompletableFuture.completedFuture( Validation.fail( SharedErrorUtils.buildErrorMessage( handler_for_errors.toString(), msg_for_errors, ErrorUtils.getLongForm( SharedErrorUtils.ERROR_CACHING_SHARED_LIBS, e, bucket.full_name())))); } }
@Test public void test_validationFail() throws IOException { final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_fail.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class).done().get(); // 1) Verbose mode off { final Collection<BasicMessageBean> res_col = _index_service.validateSchema(bucket.data_schema().columnar_schema(), bucket)._2(); final Collection<BasicMessageBean> res_search = _index_service.validateSchema(bucket.data_schema().search_index_schema(), bucket)._2(); final Collection<BasicMessageBean> res_time = _index_service.validateSchema(bucket.data_schema().temporal_schema(), bucket)._2(); assertEquals(0, res_col.size()); assertEquals(0, res_time.size()); assertEquals(1, res_search.size()); final BasicMessageBean res_search_message = res_search.iterator().next(); assertEquals(false, res_search_message.success()); } // 2) Check setting an invalid max index size { final String bucket_str_2 = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket2 = BeanTemplateUtils.build(bucket_str_2, DataBucketBean.class).done().get(); final DataBucketBean bucket_too_small = BeanTemplateUtils.clone(bucket2) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket2.data_schema()) .with( DataSchemaBean::search_index_schema, BeanTemplateUtils.clone(bucket2.data_schema().search_index_schema()) .with(DataSchemaBean.SearchIndexSchemaBean::target_index_size_mb, 10L) .done()) .done()) .done(); final Collection<BasicMessageBean> res_search = _index_service .validateSchema(bucket.data_schema().search_index_schema(), bucket_too_small) ._2(); assertEquals(1, res_search.size()); assertEquals(false, res_search.stream().allMatch(BasicMessageBean::success)); BasicMessageBean res_search_message = res_search.iterator().next(); assertTrue( "Right message: " + res_search_message.message(), res_search_message.message().contains("10 MB")); } }
@Test public void test_validationSuccess() throws IOException { final String bucket_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_bucket_validate_success.json"), Charsets.UTF_8); final DataBucketBean bucket = BeanTemplateUtils.build(bucket_str, DataBucketBean.class).done().get(); // 1) Verbose mode off { final Collection<BasicMessageBean> res_col = _index_service.validateSchema(bucket.data_schema().columnar_schema(), bucket)._2(); final Collection<BasicMessageBean> res_search = _index_service.validateSchema(bucket.data_schema().search_index_schema(), bucket)._2(); final Collection<BasicMessageBean> res_time = _index_service.validateSchema(bucket.data_schema().temporal_schema(), bucket)._2(); assertEquals(0, res_col.size()); assertEquals(0, res_search.size()); assertEquals(0, res_time.size()); } // 2) Verbose mode on { final DataBucketBean bucket_verbose = BeanTemplateUtils.clone(bucket) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket.data_schema()) .with( DataSchemaBean::search_index_schema, BeanTemplateUtils.clone(bucket.data_schema().search_index_schema()) .with( DataSchemaBean.SearchIndexSchemaBean::technology_override_schema, ImmutableMap.builder() .putAll( bucket .data_schema() .search_index_schema() .technology_override_schema()) .put("verbose", true) .build()) .done()) .done()) .done(); final Collection<BasicMessageBean> res_col = _index_service .validateSchema(bucket_verbose.data_schema().columnar_schema(), bucket) ._2(); final Collection<BasicMessageBean> res_search = _index_service .validateSchema(bucket_verbose.data_schema().search_index_schema(), bucket) ._2(); final Collection<BasicMessageBean> res_time = _index_service .validateSchema(bucket_verbose.data_schema().temporal_schema(), bucket) ._2(); assertEquals(0, res_col.size()); assertEquals(0, res_time.size()); assertEquals(2, res_search.size()); assertEquals(true, res_search.stream().allMatch(BasicMessageBean::success)); Iterator<BasicMessageBean> res_search_message = res_search.iterator(); final String mapping_str = Resources.toString( Resources.getResource( "com/ikanow/aleph2/search_service/elasticsearch/services/test_verbose_mapping_validate_results.json"), Charsets.UTF_8); final JsonNode mapping_json = _mapper.readTree(mapping_str.getBytes()); assertEquals( mapping_json.toString(), _mapper.readTree(res_search_message.next().message()).toString()); assertTrue( "Sets the max index override: " + res_search.stream().skip(1).map(m -> m.message()).collect(Collectors.joining()), res_search_message.next().message().contains("1,000 MB")); } // 3) Temporal { final DataBucketBean bucket_temporal_no_grouping = BeanTemplateUtils.clone(bucket) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket.data_schema()) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(DataSchemaBean.TemporalSchemaBean.class) .done() .get()) .done()) .done(); assertEquals( "", _index_service .validateSchema(bucket_temporal_no_grouping.data_schema().temporal_schema(), bucket) ._1()); final DataBucketBean bucket_temporal_grouping = BeanTemplateUtils.clone(bucket) .with( DataBucketBean::data_schema, BeanTemplateUtils.clone(bucket.data_schema()) .with( DataSchemaBean::temporal_schema, BeanTemplateUtils.build(DataSchemaBean.TemporalSchemaBean.class) .with(DataSchemaBean.TemporalSchemaBean::grouping_time_period, "1d") .done() .get()) .done()) .done(); assertEquals( "_{yyyy-MM-dd}", _index_service .validateSchema(bucket_temporal_grouping.data_schema().temporal_schema(), bucket) ._1()); } }
protected static CompletableFuture<BucketActionReplyMessage> talkToStream( final IStormController storm_controller, final DataBucketBean bucket, final BucketActionMessage m, final Validation<BasicMessageBean, IEnrichmentStreamingTopology> err_or_user_topology, final Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>> err_or_map, final String source, final StreamingEnrichmentContext context, final String yarn_config_dir, final String cached_jars_dir) { try { // handle getting the user libs final List<String> user_lib_paths = err_or_map.<List<String>>validation( fail -> Collections.emptyList() // (going to die soon anyway) , success -> success .values() .stream() .map(tuple -> tuple._2.replaceFirst("file:", "")) .collect(Collectors.toList())); return err_or_user_topology.<CompletableFuture<BucketActionReplyMessage>>validation( // ERROR getting enrichment topology error -> { return CompletableFuture.completedFuture(new BucketActionHandlerMessage(source, error)); }, // NORMAL grab enrichment topology enrichment_topology -> { final String entry_point = enrichment_topology.getClass().getName(); context.setBucket(bucket); context.setUserTopologyEntryPoint(entry_point); // also set the library bean - note if here then must have been set, else // IHarvestTechnologyModule wouldn't exist err_or_map.forEach( map -> { context.setLibraryConfig( map.values() .stream() .map(t2 -> t2._1()) .filter( lib -> entry_point.equals(lib.misc_entry_point()) || entry_point.equals(lib.streaming_enrichment_entry_point())) .findFirst() .orElse(BeanTemplateUtils.build(SharedLibraryBean.class).done().get())); // (else this is a passthrough topology, so just use a dummy library bean) }); _logger.info( "Set active class=" + enrichment_topology.getClass() + " message=" + m.getClass().getSimpleName() + " bucket=" + bucket.full_name()); return Patterns.match(m) .<CompletableFuture<BucketActionReplyMessage>>andReturn() .when( BucketActionMessage.DeleteBucketActionMessage.class, msg -> { return StormControllerUtil.stopJob(storm_controller, bucket); }) .when( BucketActionMessage.NewBucketActionMessage.class, msg -> { if (!msg.is_suspended()) return StormControllerUtil.startJob( storm_controller, bucket, context, user_lib_paths, enrichment_topology, cached_jars_dir); else return StormControllerUtil.stopJob( storm_controller, bucket); // (nothing to do but just do this to return something // sensible) }) .when( BucketActionMessage.UpdateBucketActionMessage.class, msg -> { if (msg.is_enabled()) return StormControllerUtil.restartJob( storm_controller, bucket, context, user_lib_paths, enrichment_topology, cached_jars_dir); else return StormControllerUtil.stopJob(storm_controller, bucket); }) .when( BucketActionMessage.TestBucketActionMessage.class, msg -> { // TODO (ALEPH-25): in the future run this test with local storm rather than // remote storm_controller return StormControllerUtil.restartJob( storm_controller, bucket, context, user_lib_paths, enrichment_topology, cached_jars_dir); }) .otherwise( msg -> { return CompletableFuture.completedFuture( new BucketActionHandlerMessage( source, new BasicMessageBean( new Date(), false, null, "Unknown message", 0, "Unknown message", null))); }); }); } catch (Throwable e) { // (trying to use Validation to avoid this, but just in case...) return CompletableFuture.completedFuture( new BucketActionHandlerMessage( source, new BasicMessageBean( new Date(), false, null, ErrorUtils.getLongForm("Error loading streaming class: {0}", e), 0, ErrorUtils.getLongForm("Error loading streaming class: {0}", e), null))); } }
@Test public void test_cacheJars() throws UnsupportedFileSystemException, InterruptedException, ExecutionException { try { // Preamble: // 0) Insert 2 library beans into the management db final DataBucketBean bucket = createBucket("test_tech_id_stream"); final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example.jar"; final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1)); final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar"; final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2)); List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2); final IManagementDbService underlying_db = _service_context.getService(IManagementDbService.class, Optional.empty()).get(); final IManagementCrudService<SharedLibraryBean> library_crud = underlying_db.getSharedLibraryStore(); library_crud.deleteDatastore(); assertEquals("Cleansed library store", 0L, (long) library_crud.countObjects().get()); library_crud.storeObjects(lib_elements).get(); assertEquals("Should have 4 library beans", 4L, (long) library_crud.countObjects().get()); // 0a) Check with no streaming, gets nothing { CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure = DataBucketChangeActor.cacheJars( bucket, _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(), _service_context.getStorageService(), _service_context, "test1_source", "test1_command"); if (reply_structure.get().isFail()) { fail("About to crash with: " + reply_structure.get().fail().message()); } assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess()); final Map<String, Tuple2<SharedLibraryBean, String>> reply_map = reply_structure.get().success(); assertEquals(0L, reply_map.size()); // (both modules, 1x for _id and 1x for name) } // 0b) Create the more complex bucket final EnrichmentControlMetadataBean enrichment_module = new EnrichmentControlMetadataBean( "test_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id"), null, new LinkedHashMap<>()); final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module) .done(); // 1) Normal operation CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure = DataBucketChangeActor.cacheJars( bucket2, _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(), _service_context.getStorageService(), _service_context, "test1_source", "test1_command"); if (reply_structure.get().isFail()) { fail("About to crash with: " + reply_structure.get().fail().message()); } assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess()); final Map<String, Tuple2<SharedLibraryBean, String>> reply_map = reply_structure.get().success(); assertEquals( "Should have 4 beans: " + reply_map.toString(), 4L, reply_map.size()); // (both modules, 1x for _id and 1x for name) // 3) Couple of error cases: final EnrichmentControlMetadataBean enrichment_module2 = new EnrichmentControlMetadataBean( "test_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id", "failtest"), null, new LinkedHashMap<>()); final DataBucketBean bucket3 = BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module2) .done(); CompletableFuture< Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure3 = DataBucketChangeActor.cacheJars( bucket3, _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(), _service_context.getStorageService(), _service_context, "test2_source", "test2_command"); assertTrue("cacheJars should return error", reply_structure3.get().isFail()); } catch (Exception e) { System.out.println(ErrorUtils.getLongForm("guice? {0}", e)); throw e; } }
@Test public void test_getStreamingTopology() throws UnsupportedFileSystemException, InterruptedException, ExecutionException { final DataBucketBean bucket = createBucket("test_tech_id_stream"); final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-topology-example.jar"; final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1)); final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar"; final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2)); List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2); ////////////////////////////////////////////////////// // 1) Check - if called with an error, then just passes that error along final BasicMessageBean error = SharedErrorUtils.buildErrorMessage("test_source", "test_message", "test_error"); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test1 = DataBucketChangeActor.getStreamingTopology( bucket, new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2", Validation.fail(error)); assertTrue("Got error back", test1.isFail()); assertEquals("test_source", test1.fail().source()); assertEquals("test_message", test1.fail().command()); assertEquals("test_error", test1.fail().message()); ////////////////////////////////////////////////////// // 2) Check the error handling inside getStreamingTopology final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test2_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_2b", Tuples._2T(null, null)) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2a")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2a", Validation.success(test2_input)); assertTrue("Got error back", test2a.isFail()); assertEquals("test_source2a", test2a.fail().source()); assertEquals("BucketActionOfferMessage", test2a.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test2b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_2b")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source2b", Validation.success(test2_input)); assertTrue("Got error back", test2b.isFail()); assertEquals("test_source2b", test2b.fail().source()); assertEquals("BucketActionOfferMessage", test2b.fail().command()); assertEquals( ErrorUtils.get( SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, bucket.full_name(), "(unknown)"), // (cloned bucket above) test2a.fail().message()); ////////////////////////////////////////////////////// // 3) OK now it will actually do something final String java_name = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_tech_id_stream.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name + ": " + new File(java_name).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(0), _service_context.getStorageService(), "test3", "test3") .get(); if (cached_file.isFail()) { fail("About to crash with: " + cached_file.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name).exists()); // OK the setup is done and validated now actually test the underlying call: final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3 = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3_input)); if (test3.isFail()) { fail("About to crash with: " + test3.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3.isSuccess()); assertTrue("topology created: ", test3.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3.success().getClass().getName()); // (Try again but with failing version, due to class not found) final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3a_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream_fail", Tuples._2T(lib_elements.get(3), cached_file.success())) .build(); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3a = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with( DataBucketBean::streaming_enrichment_topology, BeanTemplateUtils.build(EnrichmentControlMetadataBean.class) .with( EnrichmentControlMetadataBean::library_names_or_ids, Arrays.asList("test_tech_id_stream_fail")) .done() .get()) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3", Validation.success(test3a_input)); assertTrue("Got error back", test3a.isFail()); assertTrue( "Right error: " + test3a.fail().message(), test3a.fail().message().contains("com.ikanow.aleph2.test.example.ExampleStreamTopology")); // Now check with the "not just the harvest tech" flag set final String java_name2 = _service_context.getGlobalProperties().local_cached_jar_dir() + File.separator + "test_module_id.cache.jar"; _logger.info( "Needed to delete locally cached file? " + java_name2 + ": " + new File(java_name2).delete()); // Requires that the file has already been cached: final Validation<BasicMessageBean, String> cached_file2 = JarCacheUtils.getCachedJar( _service_context.getGlobalProperties().local_cached_jar_dir(), lib_elements.get(1), _service_context.getStorageService(), "test3b", "test3b") .get(); if (cached_file2.isFail()) { fail("About to crash with: " + cached_file2.fail().message()); } assertTrue("The cached file exists: " + java_name, new File(java_name2).exists()); final ImmutableMap<String, Tuple2<SharedLibraryBean, String>> test3b_input = ImmutableMap.<String, Tuple2<SharedLibraryBean, String>>builder() .put("test_tech_id_stream", Tuples._2T(lib_elements.get(0), cached_file.success())) .put("test_module_id", Tuples._2T(lib_elements.get(1), cached_file.success())) .build(); final EnrichmentControlMetadataBean enrichment_module = new EnrichmentControlMetadataBean( "test_tech_name", Collections.emptyList(), true, null, Arrays.asList("test_tech_id_stream", "test_module_id"), null, null); final Validation<BasicMessageBean, IEnrichmentStreamingTopology> test3b = DataBucketChangeActor.getStreamingTopology( BeanTemplateUtils.clone(bucket) .with(DataBucketBean::streaming_enrichment_topology, enrichment_module) .done(), new BucketActionMessage.BucketActionOfferMessage(bucket), "test_source3b", Validation.success(test3b_input)); if (test3b.isFail()) { fail("About to crash with: " + test3b.fail().message()); } assertTrue("getStreamingTopology call succeeded", test3b.isSuccess()); assertTrue("topology created: ", test3b.success() != null); assertEquals(lib_elements.get(0).misc_entry_point(), test3b.success().getClass().getName()); // TODO add a test for disabled streaming but config given (should default to passthrough top // and // ignore given topology }