@Override protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError { AttributeMetaData label = exampleSetMetaData.getLabelMetaData(); if (label != null) { if (label.isNumerical() && getParameterAsDouble(PARAMETER_LABEL_NOISE) > 0) { label.setValueSetRelation(SetRelation.SUPERSET); } } double defaultNoise = getParameterAsDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE); if (defaultNoise > 0) { for (AttributeMetaData amd : exampleSetMetaData.getAllAttributes()) { if (!amd.isSpecial()) { if (amd.isNumerical()) { amd.setValueSetRelation(SetRelation.SUPERSET); } } } } int numberOfRandomAttributes = getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES); for (int i = 0; i < numberOfRandomAttributes; i++) { AttributeMetaData amd = new AttributeMetaData("random" + ((i == 0) ? "" : i + ""), Ontology.REAL); amd.setValueRange( new Range(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), SetRelation.SUBSET); exampleSetMetaData.addAttribute(amd); } return exampleSetMetaData; }
@Override protected MDInteger getSampledSize(ExampleSetMetaData emd) throws UndefinedParameterError { switch (getParameterAsInt(PARAMETER_SAMPLE)) { case SAMPLE_ABSOLUTE: int absoluteNumber = getParameterAsInt(PARAMETER_SAMPLE_SIZE); if (emd.getNumberOfExamples().isAtLeast(absoluteNumber) == MetaDataInfo.NO) getExampleSetInputPort() .addError( new SimpleMetaDataError( Severity.ERROR, getExampleSetInputPort(), Collections.singletonList( new ParameterSettingQuickFix( this, PARAMETER_SAMPLE_SIZE, emd.getNumberOfExamples().getValue().toString())), "need_more_examples", absoluteNumber + "")); return new MDInteger(absoluteNumber); case SAMPLE_RELATIVE: MDInteger number = emd.getNumberOfExamples(); number.multiply(getParameterAsDouble(PARAMETER_SAMPLE_RATIO)); return number; default: return new MDInteger(); } }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError { // performing meta data propagation for single attribute definition if (isParameterSet(PARAMETER_VALUE) && isParameterSet(PARAMETER_ATTRIBUTE_NAME)) { AttributeMetaData targetAttribute = metaData.getAttributeByName(getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); setMetaData( getParameterAsString(PARAMETER_VALUE), targetAttribute, "parameter_must_be_numerical", new Object[] {PARAMETER_VALUE}); } // now doing same for all other defined values List<String[]> list = getParameterList(PARAMETER_ADDITIONAL_VALUES); for (String[] pair : list) { AttributeMetaData targetAttribute = metaData.getAttributeByName(pair[0]); setMetaData( pair[1], targetAttribute, "parameter_list_must_be_numerical", new Object[] {PARAMETER_ADDITIONAL_VALUES}); } return metaData; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { int sequenceLenght = getParameterAsInt(SEQ_LENGHT_KEY); int sequencesNum = getParameterAsInt(SEQ_NUM_KEY); ExampleSetMetaData emd = new ExampleSetMetaData(); emd.addAttribute(new AttributeMetaData("observation", Attributes.LABEL_NAME)); emd.setNumberOfExamples(sequenceLenght * sequencesNum); return emd; }
public ProcessLog2ExampleSet(OperatorDescription description) { super(description); ExampleSetMetaData newEMD = new ExampleSetMetaData(); newEMD.attributesAreSuperset(); newEMD.setNumberOfExamples(0); newEMD.getNumberOfExamples().increaseByUnknownAmount(); getTransformer().addRule(new GenerateNewMDRule(exampleSetOutput, newEMD)); dummyPorts.start(); getTransformer().addRule(dummyPorts.makePassThroughRule()); }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) { ExampleSetMetaData subset = attributeSelector.getMetaDataSubset(metaData, true); Iterator<AttributeMetaData> amdIterator = metaData.getAllAttributes().iterator(); while (amdIterator.hasNext()) { AttributeMetaData amd = amdIterator.next(); AttributeMetaData subsetAMD = subset.getAttributeByName(amd.getName()); if (subsetAMD == null) { amdIterator.remove(); } } return metaData; }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError { AttributeMetaData amd = metaData.getAttributeByName(getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); if (amd != null) { AttributeMetaData newAttribute = amd.clone(); newAttribute.setType(Ontology.DATE_TIME); newAttribute.getMean().setUnkown(); newAttribute.setValueSetRelation(SetRelation.UNKNOWN); if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) metaData.removeAttribute(amd); else newAttribute.setName(newAttribute.getName() + "_AS_DATE"); metaData.addAttribute(newAttribute); } return metaData; }
/** * Creates attribute meta data that represents the attribute that will be generated for the * provided arguments. * * @return the {@link AttributeMetaData} for the provided arguments */ public static AttributeMetaData generateAttributeMetaData( ExampleSetMetaData emd, String name, ExpressionType expressionType) { AttributeMetaData newAttribute = null; AttributeMetaData existingAtt = emd.getAttributeByName(name); int ontology = expressionType.getAttributeType(); if (ontology == Ontology.BINOMINAL) { newAttribute = new AttributeMetaData(name, Ontology.BINOMINAL); HashSet<String> values = new HashSet<>(); values.add("false"); values.add("true"); newAttribute.setValueSet(values, SetRelation.EQUAL); } else { newAttribute = new AttributeMetaData(name, ontology); } // restore role if attribute existed already if (existingAtt != null) { newAttribute.setRole(existingAtt.getRole()); } return newAttribute; }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) { try { AttributeMetaData amd1 = metaData.getAttributeByName(getParameterAsString(PARAMETER_FIRST_ATTRIBUTE)); AttributeMetaData amd2 = metaData.getAttributeByName(getParameterAsString(PARAMETER_SECOND_ATTRIBUTE)); if (amd1 != null && amd2 != null) { String role1 = amd1.getRole(); amd1.setRole(amd2.getRole()); amd2.setRole(role1); } } catch (UndefinedParameterError e) { } return metaData; }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) { AttributeMetaData weightAttribute = new AttributeMetaData(Attributes.WEIGHT_NAME, Ontology.REAL, Attributes.WEIGHT_NAME); weightAttribute.setValueRange(getWeightAttributeRange(), getWeightAttributeValueRelation()); metaData.addAttribute(weightAttribute); return metaData; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { ExampleSetMetaData emd = new ExampleSetMetaData(); emd.addAttribute(new AttributeMetaData("label", Attributes.LABEL_NAME, "ok", "terminate")); for (int i = 1; i < 6; i++) emd.addAttribute( new AttributeMetaData( "Year " + i, null, "New Credit", "Nothing", "End Credit", "Collect Information", "Additional Credit")); emd.setNumberOfExamples(getParameterAsInt(PARAMETER_NUMBER_EXAMPLES)); return emd; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { int numberOfExamples = getParameterAsInt(PARAMETER_NUMBER_EXAMPLES); int numberOfAttributes = getParameterAsInt(PARAMETER_NUMBER_OF_ATTRIBUTES); int numberOfValues = getParameterAsInt(PARAMETER_NUMBER_OF_VALUES); ExampleSetMetaData emd = new ExampleSetMetaData(); emd.addAttribute(new AttributeMetaData("label", Attributes.LABEL_NAME, "positive", "negative")); // generating values String[] values = new String[numberOfValues]; for (int i = 0; i < numberOfValues; i++) values[i] = "value" + i; // attributes for (int i = 0; i < numberOfAttributes; i++) emd.addAttribute(new AttributeMetaData("att" + (i + 1), null, values)); emd.setNumberOfExamples(numberOfExamples); return emd; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { ExampleSetMetaData emd = new ExampleSetMetaData(); emd.addAttribute(new AttributeMetaData("transaction_id", Ontology.INTEGER, Attributes.ID_NAME)); emd.addAttribute( new AttributeMetaData( "store_id", null, "Store 01", "Store 02", "Store 03", "Store 04", "Store 05", "Store 06", "Store 07", "Store 08", "Store 09", "Store 10", "Store 11", "Store 12", "Store 13", "Store 14", "Store 15")); String[] customers = new String[MAX_CUSTOMERS]; for (int i = 0; i < MAX_CUSTOMERS; i++) { customers[i] = "Customer " + (i + 1); } emd.addAttribute(new AttributeMetaData("customer_id", null, customers)); emd.addAttribute( new AttributeMetaData("product_id", null, Ontology.INTEGER, new Range(10000, 100000))); emd.addAttribute(new AttributeMetaData("product_category", null, PRODUCT_CATEGORIES)); emd.addAttribute(new AttributeMetaData("date", Ontology.DATE)); emd.addAttribute(new AttributeMetaData("amount", null, Ontology.INTEGER, new Range(1, 10))); emd.addAttribute( new AttributeMetaData("single_price", null, Ontology.INTEGER, new Range(10, 100))); emd.setNumberOfExamples(getParameterAsInt(PARAMETER_NUMBER_EXAMPLES)); return emd; }
/** * Subclasses might override this method to define the meta data transformation performed by this * operator. The default implementation takes all attributes specified by the {@link * AttributeSubsetSelector} and passes them to {@link #modifyAttributeMetaData(ExampleSetMetaData, * AttributeMetaData)} and replaces them accordingly. * * @throws UndefinedParameterError */ @Override protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError { ExampleSetMetaData subsetMetaData = attributeSelector.getMetaDataSubset(exampleSetMetaData, isSupportingAttributeRoles()); checkSelectedSubsetMetaData(subsetMetaData); for (AttributeMetaData amd : subsetMetaData.getAllAttributes()) { Collection<AttributeMetaData> replacement = null; replacement = modifyAttributeMetaData(exampleSetMetaData, amd); if (replacement != null) { if (replacement.size() == 1) { AttributeMetaData replacementAttribute = replacement.iterator().next(); replacementAttribute.setRole( exampleSetMetaData.getAttributeByName(amd.getName()).getRole()); } exampleSetMetaData.removeAttribute(amd); exampleSetMetaData.addAllAttributes(replacement); } } return exampleSetMetaData; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { ExampleSetMetaData emd = new ExampleSetMetaData(); AttributeMetaData amd = new AttributeMetaData("label", Ontology.NOMINAL, Attributes.LABEL_NAME); emd.addAttribute(amd); int desirendNumberOfAttributes = getParameterAsInt(PARAMETER_NUMBER_ATTRIBUTES); double mean = getParameterAsDouble(PARAMETER_SPARSE_FRACTION); if (desirendNumberOfAttributes > 20) { emd.attributesAreSuperset(); // first ten for (int i = 1; i < 11; i++) { AttributeMetaData newAMD = new AttributeMetaData("att" + i, Ontology.REAL); newAMD.setValueRange(new Range(0, 1), SetRelation.EQUAL); newAMD.setMean(new MDReal(mean)); emd.addAttribute(newAMD); } // last ten for (int i = desirendNumberOfAttributes - 10; i <= desirendNumberOfAttributes; i++) { AttributeMetaData newAMD = new AttributeMetaData("att" + i, Ontology.REAL); newAMD.setValueRange(new Range(0, 1), SetRelation.EQUAL); newAMD.setMean(new MDReal(mean)); emd.addAttribute(newAMD); } } else { for (int i = 0; i < desirendNumberOfAttributes; i++) { AttributeMetaData newAMD = new AttributeMetaData("att" + (i + 1), Ontology.REAL); newAMD.setValueRange(new Range(0, 1), SetRelation.EQUAL); newAMD.setMean(new MDReal(mean)); emd.addAttribute(newAMD); } } return emd; }
private ExampleSetMetaData applyRulesOnMetaData( List<String> rules, MetaData metaData, FilterConditon condition) throws UndefinedParameterError { if (metaData == null || !(metaData instanceof ExampleSetMetaData) || condition == null) { return new ExampleSetMetaData(); } ExampleSetMetaData sortedMetaData = new ExampleSetMetaData(); ExampleSetMetaData originalMetaData = (ExampleSetMetaData) metaData; Collection<AttributeMetaData> allAttributes = originalMetaData.getAllAttributes(); // iterate over all rules for (String currentRule : rules) { // iterate over all original attributes and check if rule applies Iterator<AttributeMetaData> iterator = allAttributes.iterator(); while (iterator.hasNext()) { AttributeMetaData attrMD = iterator.next(); // skip special attributes if (attrMD.isSpecial()) { continue; } // if rule applies, remove attribute from unmachted list and add it to rules matched // list if (condition.match(currentRule, attrMD.getName())) { iterator.remove(); sortedMetaData.addAttribute(attrMD); } } } if (!getParameterAsString(PARAMETER_HANDLE_UNMATCHED_ATTRIBUTES) .equals(REMOVE_UNMATCHED_MODE)) { sortedMetaData.addAllAttributes(allAttributes); } return sortedMetaData; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { ExampleSetMetaData emd = new ExampleSetMetaData(); String[] possibleValues = new String[getParameterAsInt(PARAMETER_NUMBER_CUSTOMERS)]; for (int i = 0; i < possibleValues.length; i++) { possibleValues[i] = "Id " + (i + 1); } emd.addAttribute(new AttributeMetaData("Id", Attributes.ID_NAME, possibleValues)); possibleValues = new String[getParameterAsInt(PARAMETER_NUMBER_ITEMS)]; for (int i = 0; i < possibleValues.length; i++) { possibleValues[i] = "Item " + (i + 1); } emd.addAttribute(new AttributeMetaData("Item", null, possibleValues)); emd.addAttribute( new AttributeMetaData( "Amount", null, Ontology.INTEGER, new Range(0, Double.POSITIVE_INFINITY))); emd.setNumberOfExamples(getParameterAsInt(PARAMETER_NUMBER_TRANSACTIONS)); return emd; }
public Vector<String> getAttributeNames() { Vector<String> names = new Vector<>(); Vector<String> regularNames = new Vector<>(); MetaData metaData = getMetaData(); if (metaData != null) { if (metaData instanceof ExampleSetMetaData) { ExampleSetMetaData emd = (ExampleSetMetaData) metaData; for (AttributeMetaData amd : emd.getAllAttributes()) { if (!isFilteredOut(amd) && isOfAllowedType(amd.getValueType())) { if (amd.isSpecial()) { names.add(amd.getName()); } else { regularNames.add(amd.getName()); } } } } else if (metaData instanceof ModelMetaData) { ModelMetaData mmd = (ModelMetaData) metaData; ExampleSetMetaData emd = mmd.getTrainingSetMetaData(); if (emd != null) { for (AttributeMetaData amd : emd.getAllAttributes()) { if (!isFilteredOut(amd) && isOfAllowedType(amd.getValueType())) { if (amd.isSpecial()) { names.add(amd.getName()); } else { regularNames.add(amd.getName()); } } } } } } Collections.sort(names); Collections.sort(regularNames); names.addAll(regularNames); return names; }
@Override protected MDInteger getSampledSize(ExampleSetMetaData emd) throws UndefinedParameterError { switch (getParameterAsInt(PARAMETER_SAMPLE)) { case SAMPLE_ABSOLUTE: return new MDInteger(getParameterAsInt(PARAMETER_SAMPLE_SIZE)); case SAMPLE_RELATIVE: MDInteger number = emd.getNumberOfExamples(); number.multiply(getParameterAsDouble(PARAMETER_SAMPLE_RATIO)); return number; default: return new MDInteger(); } }
@Override public ExampleSetMetaData applyOnFilteredMetaData(ExampleSetMetaData metaData) { for (AttributeMetaData amd : metaData.getAllAttributes()) { if (amd.isNumerical() && !amd.isSpecial()) { Range range = amd.getValueRange(); amd.setValueRange( new Range(0, Math.max(Math.abs(range.getLower()), Math.abs(range.getUpper()))), amd.getValueSetRelation()); amd.getMean().setUnkown(); } } return metaData; }
@Override public ExampleSetMetaData applyOnFilteredMetaData(ExampleSetMetaData emd) { boolean round = getParameterAsBoolean(PARAMETER_ROUND); for (AttributeMetaData amd : emd.getAllAttributes()) { if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), Ontology.INTEGER))) { amd.setType(Ontology.INTEGER); } if (round) { amd.setValueRange( new Range( Math.round(amd.getValueRange().getLower()), Math.round(amd.getValueRange().getUpper())), SetRelation.EQUAL); } else { amd.setValueRange( new Range((long) amd.getValueRange().getLower(), (long) amd.getValueRange().getUpper()), SetRelation.EQUAL); } } return emd; }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError { metaData.getNumberOfExamples().reduceByUnknownAmount(); // TODO: Could instead take a look at the values of ids if nominal. return metaData; }
@Override public MetaData getGeneratedMetaData() throws OperatorException { ExampleSetMetaData emd = new ExampleSetMetaData(); emd.addAttribute( new AttributeMetaData("label", Attributes.LABEL_NAME, "response", "no response")); emd.addAttribute(new AttributeMetaData("name", Ontology.NOMINAL)); // "name", "age", "lifestyle", "zip code", "family status", "car", "sports", "earnings" emd.addAttribute(new AttributeMetaData("age", null, Ontology.INTEGER, new Range(15, 70))); emd.addAttribute(new AttributeMetaData("lifestyle", null, POSSIBLE_VALUES[2])); emd.addAttribute( new AttributeMetaData("zip code", null, Ontology.INTEGER, new Range(10000, 100000))); emd.addAttribute(new AttributeMetaData("family status", null, POSSIBLE_VALUES[4])); emd.addAttribute(new AttributeMetaData("car", null, POSSIBLE_VALUES[5])); emd.addAttribute(new AttributeMetaData("sports", null, POSSIBLE_VALUES[6])); emd.addAttribute( new AttributeMetaData("earnings", null, Ontology.INTEGER, new Range(20000, 150000))); emd.setNumberOfExamples(getParameterAsInt(PARAMETER_NUMBER_EXAMPLES)); return emd; }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) { return metaData.transpose(); }
@Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError { ExampleSetMetaData resultMD = metaData.clone(); resultMD.clear(); // add group by attributes if (isParameterSet(PARAMETER_GROUP_BY_ATTRIBUTES) && !getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES).isEmpty()) { String attributeRegex = getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES); Pattern pattern = Pattern.compile(attributeRegex); for (AttributeMetaData amd : metaData.getAllAttributes()) { if (pattern.matcher(amd.getName()).matches()) { if (amd.isNumerical() && getCompatibilityLevel().isAtMost(VERSION_5_1_6)) { // converting type to mimic // NumericalToPolynomial used below amd.setType(Ontology.NOMINAL); amd.setValueSet(Collections.<String>emptySet(), SetRelation.SUPERSET); } resultMD.addAttribute(amd); } } resultMD.getNumberOfExamples().reduceByUnknownAmount(); } if (resultMD.getAllAttributes().isEmpty() && getCompatibilityLevel().isAtMost(VERSION_5_1_6)) { AttributeMetaData allGroup = new AttributeMetaData(GENERIC_GROUP_NAME, Ontology.NOMINAL); Set<String> values = new TreeSet<String>(); values.add(GENERIC_ALL_NAME); allGroup.setValueSet(values, SetRelation.EQUAL); resultMD.addAttribute(allGroup); resultMD.setNumberOfExamples(new MDInteger(1)); } // add aggregated attributes of default aggregation: They will apply only to those attribute not // mentioned explicitly List<String[]> parameterList = this.getParameterList(PARAMETER_AGGREGATION_ATTRIBUTES); HashSet<String> explicitDefinedAttributes = new HashSet<String>(); for (String[] function : parameterList) { explicitDefinedAttributes.add(function[0]); } if (getParameterAsBoolean(PARAMETER_USE_DEFAULT_AGGREGATION)) { String defaultFunction = getParameterAsString(PARAMETER_DEFAULT_AGGREGATION_FUNCTION); ExampleSetMetaData metaDataSubset = attributeSelector.getMetaDataSubset(metaData, false); for (AttributeMetaData amd : metaDataSubset.getAllAttributes()) { if (!explicitDefinedAttributes.contains(amd.getName())) { AttributeMetaData newAMD = AggregationFunction.getAttributeMetaData( defaultFunction, amd, getExampleSetInputPort()); if (newAMD != null) resultMD.addAttribute(newAMD); } } } // add explicitly defined attributes of list for (String[] function : parameterList) { AttributeMetaData amd = metaData.getAttributeByName(function[0]); if (amd != null) { AttributeMetaData newMD = AggregationFunction.getAttributeMetaData(function[1], amd, getExampleSetInputPort()); if (newMD != null) resultMD.addAttribute(newMD); } else { // in this case we should register a warning, but continue anyway in cases we don't have the // correct set available getExampleSetInputPort() .addError( new SimpleMetaDataError( Severity.WARNING, getExampleSetInputPort(), "aggregation.attribute_unknown", function[0])); AttributeMetaData newAMD = AggregationFunction.getAttributeMetaData( function[1], new AttributeMetaData(function[0], Ontology.ATTRIBUTE_VALUE), getExampleSetInputPort()); if (newAMD != null) resultMD.addAttribute(newAMD); } } return resultMD; }