public InsertIntoTableJobBuilderPresenter( AnalyzerComponentBuilder<InsertIntoTableAnalyzer> analyzerJobBuilder, WindowContext windowContext, PropertyWidgetFactory propertyWidgetFactory, DataCleanerConfiguration configuration, DCModule dcModule) { super(analyzerJobBuilder, propertyWidgetFactory); _overriddenPropertyWidgets = new HashMap<ConfiguredPropertyDescriptor, PropertyWidget<?>>(); final AnalyzerDescriptor<InsertIntoTableAnalyzer> descriptor = analyzerJobBuilder.getDescriptor(); assert descriptor.getComponentClass() == InsertIntoTableAnalyzer.class; _datastoreProperty = descriptor.getConfiguredProperty("Datastore"); _schemaNameProperty = descriptor.getConfiguredProperty("Schema name"); _tableNameProperty = descriptor.getConfiguredProperty("Table name"); _inputColumnsProperty = descriptor.getConfiguredProperty("Values"); _columnNamesProperty = descriptor.getConfiguredProperty("Column names"); _bufferSizeProperty = descriptor.getConfiguredProperty("Buffer size"); _truncateTableProperty = descriptor.getConfiguredProperty("Truncate table"); _errorHandlingProperty = descriptor.getConfiguredProperty("How to handle insertion errors?"); _errorFileLocationProperty = descriptor.getConfiguredProperty("Error log file location"); _additionalErrorLogValuesProperty = descriptor.getConfiguredProperty("Additional error log values"); // the Datastore property assert _datastoreProperty != null; assert _datastoreProperty.getType() == Datastore.class; final SingleDatastorePropertyWidget datastorePropertyWidget = new SingleDatastorePropertyWidget( analyzerJobBuilder, _datastoreProperty, configuration.getDatastoreCatalog(), dcModule); datastorePropertyWidget.setOnlyUpdatableDatastores(true); _overriddenPropertyWidgets.put(_datastoreProperty, datastorePropertyWidget); // The schema name (String) property final SchemaNamePropertyWidget schemaNamePropertyWidget = new SchemaNamePropertyWidget(analyzerJobBuilder, _schemaNameProperty); _overriddenPropertyWidgets.put(_schemaNameProperty, schemaNamePropertyWidget); // The table name (String) property final SingleTableNamePropertyWidget tableNamePropertyWidget = new SingleTableNamePropertyWidget(analyzerJobBuilder, _tableNameProperty, windowContext); _overriddenPropertyWidgets.put(_tableNameProperty, tableNamePropertyWidget); // the InputColumn<?>[] property assert _inputColumnsProperty != null; assert _inputColumnsProperty.getType() == InputColumn[].class; final MultipleMappedColumnsPropertyWidget inputColumnsPropertyWidget = new MultipleMappedPrefixedColumnsPropertyWidget( analyzerJobBuilder, _inputColumnsProperty, _columnNamesProperty, " → "); _overriddenPropertyWidgets.put(_inputColumnsProperty, inputColumnsPropertyWidget); // the String[] property assert _columnNamesProperty != null; assert _columnNamesProperty.getType() == String[].class; _overriddenPropertyWidgets.put( _columnNamesProperty, inputColumnsPropertyWidget.getMappedColumnNamesPropertyWidget()); // chain combo boxes datastorePropertyWidget.connectToSchemaNamePropertyWidget(schemaNamePropertyWidget); schemaNamePropertyWidget.connectToTableNamePropertyWidget(tableNamePropertyWidget); tableNamePropertyWidget.addComboListener( new Listener<Table>() { @Override public void onItemSelected(Table item) { // update the column combo boxes when the table is selected inputColumnsPropertyWidget.setTable(item); } }); // initialize schemaNamePropertyWidget.setDatastore(datastorePropertyWidget.getValue()); tableNamePropertyWidget.setSchema( datastorePropertyWidget.getValue(), schemaNamePropertyWidget.getSchema()); inputColumnsPropertyWidget.setTable(tableNamePropertyWidget.getTable()); }
/** * Creates (and registers) a widget that fits the specified configured property. * * @param propertyDescriptor * @return */ @Override public PropertyWidget<?> create(ConfiguredPropertyDescriptor propertyDescriptor) { // first check if there is a mapping created for this property // descriptor PropertyWidget<?> propertyWidget = _propertyWidgetCollection.getMappedPropertyWidget(propertyDescriptor); if (propertyWidget != null) { return propertyWidget; } final HiddenProperty hiddenProperty = propertyDescriptor.getAnnotation(HiddenProperty.class); if (hiddenProperty != null && hiddenProperty.hiddenForLocalAccess()) { return null; } if (propertyDescriptor.getAnnotation(Deprecated.class) != null) { return null; } if (getComponentBuilder() instanceof AnalyzerComponentBuilder) { AnalyzerComponentBuilder<?> analyzer = (AnalyzerComponentBuilder<?>) getComponentBuilder(); if (analyzer.isMultipleJobsSupported()) { if (analyzer.isMultipleJobsDeterminedBy(propertyDescriptor)) { final MultipleInputColumnsPropertyWidget result = new MultipleInputColumnsPropertyWidget(analyzer, propertyDescriptor); return result; } } } // check for fitting property widgets by type final Class<?> type = propertyDescriptor.getBaseType(); final Class<? extends PropertyWidget<?>> widgetClass; if (propertyDescriptor.isArray()) { if (propertyDescriptor.isInputColumn()) { widgetClass = MultipleInputColumnsPropertyWidget.class; } else if (ReflectionUtils.isString(type)) { widgetClass = MultipleStringPropertyWidget.class; } else if (type == Dictionary.class) { widgetClass = MultipleDictionariesPropertyWidget.class; } else if (type == SynonymCatalog.class) { widgetClass = MultipleSynonymCatalogsPropertyWidget.class; } else if (type == StringPattern.class) { widgetClass = MultipleStringPatternPropertyWidget.class; } else if (type == EnumerationValue.class && propertyDescriptor instanceof EnumerationProvider) { widgetClass = MultipleRemoteEnumPropertyWidget.class; } else if (type.isEnum()) { widgetClass = MultipleEnumPropertyWidget.class; } else if (type == Class.class) { widgetClass = MultipleClassesPropertyWidget.class; } else if (type == char.class) { widgetClass = MultipleCharPropertyWidget.class; } else if (ReflectionUtils.isNumber(type)) { widgetClass = MultipleNumberPropertyWidget.class; } else { // not yet implemented widgetClass = DummyPropertyWidget.class; } } else { if (propertyDescriptor.isInputColumn()) { if (_componentBuilder .getDescriptor() .getConfiguredPropertiesByType(InputColumn.class, true) .size() == 1) { // if there is only a single input column property, it // will // be displayed using radiobuttons. widgetClass = SingleInputColumnRadioButtonPropertyWidget.class; } else { // if there are multiple input column properties, they // will // be displayed using combo boxes. widgetClass = SingleInputColumnComboBoxPropertyWidget.class; } } else if (ReflectionUtils.isCharacter(type)) { widgetClass = SingleCharacterPropertyWidget.class; } else if (ReflectionUtils.isString(type)) { widgetClass = SingleStringPropertyWidget.class; } else if (ReflectionUtils.isBoolean(type)) { widgetClass = SingleBooleanPropertyWidget.class; } else if (ReflectionUtils.isNumber(type)) { widgetClass = SingleNumberPropertyWidget.class; } else if (ReflectionUtils.isDate(type)) { widgetClass = SingleDatePropertyWidget.class; } else if (type == Dictionary.class) { widgetClass = SingleDictionaryPropertyWidget.class; } else if (type == SynonymCatalog.class) { widgetClass = SingleSynonymCatalogPropertyWidget.class; } else if (type == StringPattern.class) { widgetClass = SingleStringPatternPropertyWidget.class; } else if (type == EnumerationValue.class && propertyDescriptor instanceof EnumerationProvider) { widgetClass = SingleRemoteEnumPropertyWidget.class; } else if (type.isEnum()) { widgetClass = SingleEnumPropertyWidget.class; } else if (ReflectionUtils.is(type, Resource.class)) { widgetClass = SingleResourcePropertyWidget.class; } else if (type == File.class) { widgetClass = SingleFilePropertyWidget.class; } else if (type == Pattern.class) { widgetClass = SinglePatternPropertyWidget.class; } else if (ReflectionUtils.is(type, Datastore.class)) { widgetClass = SingleDatastorePropertyWidget.class; } else if (type == Class.class) { widgetClass = SingleClassPropertyWidget.class; } else if (type == Map.class) { final Class<?> genericType1 = propertyDescriptor.getTypeArgument(0); final Class<?> genericType2 = propertyDescriptor.getTypeArgument(1); if (genericType1 == String.class && genericType2 == String.class) { widgetClass = MapStringToStringPropertyWidget.class; } else { // not yet implemented widgetClass = DummyPropertyWidget.class; } } else { // not yet implemented widgetClass = DummyPropertyWidget.class; } } final Injector injector = getInjectorForPropertyWidgets(propertyDescriptor); final PropertyWidget<?> result = injector.getInstance(widgetClass); return result; }
public void testScenario() throws Throwable { final AnalysisJob job; try (DatastoreConnection connection = datastore.openConnection(); ) { final DataContext dataContext = connection.getDataContext(); final Table table = dataContext.getTableByQualifiedLabel("PUBLIC.CUSTOMERS"); final Row row = MetaModelHelper.executeSingleRowQuery( dataContext, dataContext.query().from(table).selectCount().toQuery()); assertEquals(recordsInTable, ((Number) row.getValue(0)).intValue()); try (AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration)) { jobBuilder.setDatastore(datastore); jobBuilder.addSourceColumns("CUSTOMERS.CONTACTFIRSTNAME"); jobBuilder.addSourceColumns("CUSTOMERS.CONTACTLASTNAME"); // although not semantically correct, we pretend that EVEN is // the // success-state in our cleansing street and that ODD is the // reject-state. final Category valid = org.datacleaner.test.mock.EvenOddFilter.Category.EVEN; final Category invalid = org.datacleaner.test.mock.EvenOddFilter.Category.ODD; final TransformerComponentBuilder<MockTransformer> trans1 = jobBuilder.addTransformer(MockTransformer.class); trans1.setName("trans1"); trans1.addInputColumn(jobBuilder.getSourceColumns().get(0)); final FilterComponentBuilder< EvenOddFilter, org.datacleaner.test.mock.EvenOddFilter.Category> filter1 = jobBuilder.addFilter(EvenOddFilter.class); filter1.setName("filter1"); filter1.addInputColumn(trans1.getOutputColumns().get(0)); final TransformerComponentBuilder<MockTransformer> trans2 = jobBuilder.addTransformer(MockTransformer.class); trans2.setName("trans2"); trans2.addInputColumn(jobBuilder.getSourceColumns().get(1)); trans2.setRequirement(filter1, valid); final FilterComponentBuilder< EvenOddFilter, org.datacleaner.test.mock.EvenOddFilter.Category> filter2 = jobBuilder.addFilter(EvenOddFilter.class); filter2.setName("filter2"); filter2.addInputColumn(trans2.getOutputColumns().get(0)); final AnalyzerComponentBuilder<MockAnalyzer> analyzer1 = jobBuilder.addAnalyzer(MockAnalyzer.class); analyzer1.setName("success"); analyzer1.addInputColumn(jobBuilder.getSourceColumns().get(0)); analyzer1.addInputColumn(jobBuilder.getSourceColumns().get(1)); analyzer1.addInputColumn(trans1.getOutputColumns().get(0)); analyzer1.addInputColumn(trans2.getOutputColumns().get(0)); analyzer1.setRequirement(filter2, valid); final FilterOutcome invalid1 = filter1.getFilterOutcome(invalid); final FilterOutcome invalid2 = filter2.getFilterOutcome(invalid); final AnalyzerComponentBuilder<MockAnalyzer> analyzer2 = jobBuilder.addAnalyzer(MockAnalyzer.class); analyzer2.setName("rejects"); analyzer2.addInputColumn(jobBuilder.getSourceColumns().get(0)); analyzer2.addInputColumn(jobBuilder.getSourceColumns().get(1)); analyzer2.setComponentRequirement(new CompoundComponentRequirement(invalid1, invalid2)); job = jobBuilder.toAnalysisJob(); } } final AnalysisRunner runner = new AnalysisRunnerImpl(configuration); final AnalysisResultFuture resultFuture = runner.run(job); resultFuture.await(); if (resultFuture.isErrornous()) { throw resultFuture.getErrors().get(0); } int recordsInResults = 0; final Map<ComponentJob, AnalyzerResult> map = resultFuture.getResultMap(); for (Entry<ComponentJob, AnalyzerResult> entry : map.entrySet()) { final ComponentJob componentJob = entry.getKey(); @SuppressWarnings("unchecked") final ListResult<InputRow> result = (ListResult<InputRow>) entry.getValue(); final List<InputRow> values = result.getValues(); final int recordsInResult = values.size(); recordsInResults += recordsInResult; switch (componentJob.getName()) { case "success": case "rejects": // expected states assertTrue( "Expected records in all buckets of the cleansing street, but did not find any in: " + componentJob, recordsInResult > 0); assertTrue( "Expected records to be distributed across buckets, but found all in: " + componentJob, recordsInResult != recordsInTable); break; default: fail("Unexpected component in result map: " + componentJob); } } assertEquals(recordsInTable, recordsInResults); }
public void testParseAndAssignDictionaries() throws Throwable { Collection<Dictionary> dictionaries = new ArrayList<Dictionary>(); dictionaries.add( new SimpleDictionary("eobjects.org products", "MetaModel", "DataCleaner", "AnalyzerBeans")); dictionaries.add( new SimpleDictionary( "apache products", "commons-lang", "commons-math", "commons-codec", "commons-logging")); dictionaries.add( new SimpleDictionary( "logging products", "commons-logging", "log4j", "slf4j", "java.util.Logging")); Collection<SynonymCatalog> synonymCatalogs = new ArrayList<SynonymCatalog>(); synonymCatalogs.add( new SimpleSynonymCatalog( "translated terms", new SimpleSynonym("hello", "howdy", "hi", "yo", "hey"), new SimpleSynonym("goodbye", "bye", "see you", "hey"))); Collection<StringPattern> stringPatterns = new ArrayList<StringPattern>(); ReferenceDataCatalogImpl ref = new ReferenceDataCatalogImpl(dictionaries, synonymCatalogs, stringPatterns); Datastore datastore = new CsvDatastore("my database", "src/test/resources/projects.csv"); DataCleanerConfigurationImpl conf = new DataCleanerConfigurationImpl(); AnalysisJobBuilder job = new AnalysisJobBuilder(conf); job.setDatastore(datastore); job.addSourceColumns("product", "version"); TransformerComponentBuilder<DictionaryMatcherTransformer> tjb1 = job.addTransformer(DictionaryMatcherTransformer.class); tjb1.setConfiguredProperty( "Dictionaries", new Dictionary[] { ref.getDictionary("eobjects.org products"), ref.getDictionary("apache products"), ref.getDictionary("logging products") }); tjb1.addInputColumn(job.getSourceColumnByName("product")); List<MutableInputColumn<?>> outputColumns = tjb1.getOutputColumns(); assertEquals(3, outputColumns.size()); outputColumns.get(0).setName("eobjects match"); outputColumns.get(1).setName("apache match"); outputColumns.get(2).setName("logging match"); TransformerComponentBuilder<ConvertToNumberTransformer> tjb2 = job.addTransformer(ConvertToNumberTransformer.class); tjb2.addInputColumn(outputColumns.get(2)); tjb2.getOutputColumns().get(0).setName("logging match -> number"); AnalyzerComponentBuilder<ValueDistributionAnalyzer> ajb = job.addAnalyzer(ValueDistributionAnalyzer.class); ajb.addInputColumns(tjb1.getOutputColumns()); ajb.addInputColumns(tjb2.getOutputColumns()); assertTrue(job.isConfigured()); AnalysisJob analysisJob = job.toAnalysisJob(); AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(conf).run(analysisJob); if (!resultFuture.isSuccessful()) { job.close(); throw resultFuture.getErrors().get(0); } List<AnalyzerResult> results = resultFuture.getResults(); assertEquals(4, results.size()); ValueDistributionAnalyzerResult res = (ValueDistributionAnalyzerResult) results.get(0); assertEquals("eobjects match", res.getName()); assertEquals(8, res.getCount("true").intValue()); assertEquals(4, res.getCount("false").intValue()); res = (ValueDistributionAnalyzerResult) results.get(1); assertEquals("apache match", res.getName()); assertEquals(2, res.getCount("true").intValue()); assertEquals(10, res.getCount("false").intValue()); res = (ValueDistributionAnalyzerResult) results.get(2); assertEquals("logging match", res.getName()); assertEquals(3, res.getCount("true").intValue()); assertEquals(9, res.getCount("false").intValue()); res = (ValueDistributionAnalyzerResult) results.get(3); assertEquals("logging match -> number", res.getName()); assertEquals(3, res.getCount("1").intValue()); assertEquals(9, res.getCount("0").intValue()); job.close(); }