private void parseLeaf( LeafAggregationTreeNode node, double[] dataOfUpperLevels, List<double[]> allGroupCombinations, List<List<Aggregator>> allAggregators, DataRowFactory factory, Attribute[] newAttributes, List<AggregationFunction> aggregationFunctions) { // first copying data from groups double[] newGroupCombination = new double[dataOfUpperLevels.length]; System.arraycopy(dataOfUpperLevels, 0, newGroupCombination, 0, dataOfUpperLevels.length); allGroupCombinations.add(newGroupCombination); // DoubleArrayDataRow row = new DoubleArrayDataRow(newData); // check whether leaf exists if (node != null) { // int i = dataOfUpperLevels.length; // number of group attributes int i = 0; for (Aggregator aggregator : node.getAggregators()) { allAggregators.get(i).add(aggregator); // aggregator.set(newAttributes[i], row); i++; } } else { // fill in defaults for all aggregation functions // int i = dataOfUpperLevels.length; // number of group attributes // for (AggregationFunction function : aggregationFunctions) { // function.setDefault(newAttributes[i], row); // i++; for (List<Aggregator> current : allAggregators) { current.add(null); } } // table.addDataRow(row); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // creating data structures for building aggregates List<AggregationFunction> aggregationFunctions = createAggreationFunctions(exampleSet); // getting attributes that define groups and weights Attribute[] groupAttributes = getMatchingAttributes( exampleSet.getAttributes(), getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES)); Attribute weightAttribute = exampleSet.getAttributes().getWeight(); boolean useWeights = weightAttribute != null; // running over exampleSet and aggregate data of each example AggregationTreeNode rootNode = new AggregationTreeNode(); LeafAggregationTreeNode leafNode = null; if (groupAttributes.length == 0) { // if no grouping, we will directly insert into leaf node leafNode = new LeafAggregationTreeNode(aggregationFunctions); } for (Example example : exampleSet) { if (groupAttributes.length > 0) { AggregationTreeNode currentNode = rootNode; // now traversing aggregation tree for m-1 group attributes for (int i = 0; i < groupAttributes.length - 1; i++) { Attribute currentAttribute = groupAttributes[i]; if (currentAttribute.isNominal()) { currentNode = currentNode.getOrCreateChild(example.getValueAsString(currentAttribute)); } else { currentNode = currentNode.getOrCreateChild(example.getValue(currentAttribute)); } } // now we have to get the leaf node containing the aggregators Attribute currentAttribute = groupAttributes[groupAttributes.length - 1]; if (currentAttribute.isNominal()) { leafNode = currentNode.getOrCreateLeaf( example.getValueAsString(currentAttribute), aggregationFunctions); } else { leafNode = currentNode.getOrCreateLeaf(example.getValue(currentAttribute), aggregationFunctions); } } // now count current example if (!useWeights) leafNode.count(example); else leafNode.count(example, example.getValue(weightAttribute)); } // now derive new example set from aggregated values boolean isCountingAllCombinations = getParameterAsBoolean(PARAMETER_ALL_COMBINATIONS); // building new attributes from grouping attributes and aggregation functions Attribute[] newAttributes = new Attribute[groupAttributes.length + aggregationFunctions.size()]; for (int i = 0; i < groupAttributes.length; i++) { newAttributes[i] = AttributeFactory.createAttribute(groupAttributes[i]); } int i = groupAttributes.length; for (AggregationFunction function : aggregationFunctions) { newAttributes[i] = function.getTargetAttribute(); i++; } // creating example table MemoryExampleTable table = new MemoryExampleTable(newAttributes); ; DataRowFactory factory = new DataRowFactory(DataRowFactory.TYPE_DOUBLE_ARRAY, '.'); double[] dataOfUpperLevels = new double[groupAttributes.length]; // prepare empty lists ArrayList<List<Aggregator>> allAggregators = new ArrayList<List<Aggregator>>(); for (int aggregatorIdx = 0; aggregatorIdx < aggregationFunctions.size(); ++aggregatorIdx) { allAggregators.add(new ArrayList<Aggregator>()); } ArrayList<double[]> allGroupCombinations = new ArrayList<double[]>(); if (groupAttributes.length > 0) { // going through all possible groups recursively parseTree( rootNode, groupAttributes, dataOfUpperLevels, 0, allGroupCombinations, allAggregators, factory, newAttributes, isCountingAllCombinations, aggregationFunctions); } else { // just enter values from single leaf node parseLeaf( leafNode, dataOfUpperLevels, allGroupCombinations, allAggregators, factory, newAttributes, aggregationFunctions); } // apply post-processing int currentFunctionIdx = 0; for (AggregationFunction aggregationFunction : aggregationFunctions) { aggregationFunction.postProcessing(allAggregators.get(currentFunctionIdx)); ++currentFunctionIdx; } // write data into table int currentRow = 0; for (double[] groupValues : allGroupCombinations) { double[] rowData = new double[newAttributes.length]; // copy group values into row System.arraycopy(groupValues, 0, rowData, 0, groupValues.length); DoubleArrayDataRow dataRow = new DoubleArrayDataRow(rowData); // copy aggregated values into row int currentColumn = groupValues.length; for (List<Aggregator> aggregatorsForColumn : allAggregators) { Aggregator aggregatorForCurrentCell = aggregatorsForColumn.get(currentRow); Attribute currentAttribute = newAttributes[currentColumn]; if (aggregatorForCurrentCell != null) { aggregatorForCurrentCell.set(currentAttribute, dataRow); } else { aggregationFunctions .get(currentColumn - groupAttributes.length) .setDefault(currentAttribute, dataRow); } ++currentColumn; } table.addDataRow(dataRow); ++currentRow; } // postprocessing for remaining compatibility: Old versions automatically added group "all". // Must remain this way for old operator // version if (getCompatibilityLevel().isAtMost(VERSION_5_1_6)) { if (groupAttributes.length == 0) { Attribute resultGroupAttribute = AttributeFactory.createAttribute(GENERIC_GROUP_NAME, Ontology.NOMINAL); table.addAttribute(resultGroupAttribute); table .getDataRow(0) .set( resultGroupAttribute, resultGroupAttribute.getMapping().mapString(GENERIC_ALL_NAME)); ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); for (Attribute attribute : newAttributes) { resultSet.getAttributes().remove(attribute); resultSet.getAttributes().addRegular(attribute); } return resultSet; } else { // make attributes nominal ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); try { NumericToNominal toNominalOperator = OperatorService.createOperator(NumericToPolynominal.class); toNominalOperator.setParameter( AttributeSubsetSelector.PARAMETER_FILTER_TYPE, AttributeSubsetSelector.CONDITION_REGULAR_EXPRESSION + ""); toNominalOperator.setParameter( RegexpAttributeFilter.PARAMETER_REGULAR_EXPRESSION, getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES)); toNominalOperator.setParameter( AttributeSubsetSelector.PARAMETER_INCLUDE_SPECIAL_ATTRIBUTES, "true"); return toNominalOperator.apply(resultSet); } catch (OperatorCreationException e) { // otherwise compatibility could not be ensured return resultSet; } } } // for recent version table is correct: Deliver example set ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); return resultSet; }