/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
public Graph<String, String> createGraph() { graph = new UndirectedSparseGraph<String, String>(); Attribute id = exampleSet.getAttributes().getId(); if (id != null) { for (Example example : exampleSet) { graph.addVertex(example.getValueAsString(id)); } addEdges(); } return graph; }
public LinkedList<String> getAllCategories(Attribute attribute) { LinkedList<String> allCategoryList = new LinkedList<String>(); Iterator<Example> reader = this.iterator(); while (reader.hasNext()) { Example example = reader.next(); String currentValue = example.getValueAsString(attribute); if (!inList(currentValue, allCategoryList)) allCategoryList.add(currentValue); } // return new SplittedExampleSet(exampleSet, partition); return allCategoryList; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); Attribute attribute = exampleSet.getAttributes().get(getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); if (attribute == null) throw new UserError(this, 111, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); int index = getParameterAsInt(PARAMETER_EXAMPLE_INDEX); if (index == 0) { throw new UserError( this, 207, "0", PARAMETER_EXAMPLE_INDEX, "only positive or negative indices are allowed"); } if (index < 0) { index = exampleSet.size() + index; } else { index--; } if (index >= exampleSet.size()) { throw new UserError(this, 110, index); } Example example = exampleSet.getExample(index); if (attribute.isNominal() || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { currentValue = example.getValueAsString(attribute); isNominal = true; } else { currentValue = Double.valueOf(example.getValue(attribute)); isNominal = false; } exampleSetOutput.deliver(exampleSet); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // creating data structures for building aggregates List<AggregationFunction> aggregationFunctions = createAggreationFunctions(exampleSet); // getting attributes that define groups and weights Attribute[] groupAttributes = getMatchingAttributes( exampleSet.getAttributes(), getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES)); Attribute weightAttribute = exampleSet.getAttributes().getWeight(); boolean useWeights = weightAttribute != null; // running over exampleSet and aggregate data of each example AggregationTreeNode rootNode = new AggregationTreeNode(); LeafAggregationTreeNode leafNode = null; if (groupAttributes.length == 0) { // if no grouping, we will directly insert into leaf node leafNode = new LeafAggregationTreeNode(aggregationFunctions); } for (Example example : exampleSet) { if (groupAttributes.length > 0) { AggregationTreeNode currentNode = rootNode; // now traversing aggregation tree for m-1 group attributes for (int i = 0; i < groupAttributes.length - 1; i++) { Attribute currentAttribute = groupAttributes[i]; if (currentAttribute.isNominal()) { currentNode = currentNode.getOrCreateChild(example.getValueAsString(currentAttribute)); } else { currentNode = currentNode.getOrCreateChild(example.getValue(currentAttribute)); } } // now we have to get the leaf node containing the aggregators Attribute currentAttribute = groupAttributes[groupAttributes.length - 1]; if (currentAttribute.isNominal()) { leafNode = currentNode.getOrCreateLeaf( example.getValueAsString(currentAttribute), aggregationFunctions); } else { leafNode = currentNode.getOrCreateLeaf(example.getValue(currentAttribute), aggregationFunctions); } } // now count current example if (!useWeights) leafNode.count(example); else leafNode.count(example, example.getValue(weightAttribute)); } // now derive new example set from aggregated values boolean isCountingAllCombinations = getParameterAsBoolean(PARAMETER_ALL_COMBINATIONS); // building new attributes from grouping attributes and aggregation functions Attribute[] newAttributes = new Attribute[groupAttributes.length + aggregationFunctions.size()]; for (int i = 0; i < groupAttributes.length; i++) { newAttributes[i] = AttributeFactory.createAttribute(groupAttributes[i]); } int i = groupAttributes.length; for (AggregationFunction function : aggregationFunctions) { newAttributes[i] = function.getTargetAttribute(); i++; } // creating example table MemoryExampleTable table = new MemoryExampleTable(newAttributes); ; DataRowFactory factory = new DataRowFactory(DataRowFactory.TYPE_DOUBLE_ARRAY, '.'); double[] dataOfUpperLevels = new double[groupAttributes.length]; // prepare empty lists ArrayList<List<Aggregator>> allAggregators = new ArrayList<List<Aggregator>>(); for (int aggregatorIdx = 0; aggregatorIdx < aggregationFunctions.size(); ++aggregatorIdx) { allAggregators.add(new ArrayList<Aggregator>()); } ArrayList<double[]> allGroupCombinations = new ArrayList<double[]>(); if (groupAttributes.length > 0) { // going through all possible groups recursively parseTree( rootNode, groupAttributes, dataOfUpperLevels, 0, allGroupCombinations, allAggregators, factory, newAttributes, isCountingAllCombinations, aggregationFunctions); } else { // just enter values from single leaf node parseLeaf( leafNode, dataOfUpperLevels, allGroupCombinations, allAggregators, factory, newAttributes, aggregationFunctions); } // apply post-processing int currentFunctionIdx = 0; for (AggregationFunction aggregationFunction : aggregationFunctions) { aggregationFunction.postProcessing(allAggregators.get(currentFunctionIdx)); ++currentFunctionIdx; } // write data into table int currentRow = 0; for (double[] groupValues : allGroupCombinations) { double[] rowData = new double[newAttributes.length]; // copy group values into row System.arraycopy(groupValues, 0, rowData, 0, groupValues.length); DoubleArrayDataRow dataRow = new DoubleArrayDataRow(rowData); // copy aggregated values into row int currentColumn = groupValues.length; for (List<Aggregator> aggregatorsForColumn : allAggregators) { Aggregator aggregatorForCurrentCell = aggregatorsForColumn.get(currentRow); Attribute currentAttribute = newAttributes[currentColumn]; if (aggregatorForCurrentCell != null) { aggregatorForCurrentCell.set(currentAttribute, dataRow); } else { aggregationFunctions .get(currentColumn - groupAttributes.length) .setDefault(currentAttribute, dataRow); } ++currentColumn; } table.addDataRow(dataRow); ++currentRow; } // postprocessing for remaining compatibility: Old versions automatically added group "all". // Must remain this way for old operator // version if (getCompatibilityLevel().isAtMost(VERSION_5_1_6)) { if (groupAttributes.length == 0) { Attribute resultGroupAttribute = AttributeFactory.createAttribute(GENERIC_GROUP_NAME, Ontology.NOMINAL); table.addAttribute(resultGroupAttribute); table .getDataRow(0) .set( resultGroupAttribute, resultGroupAttribute.getMapping().mapString(GENERIC_ALL_NAME)); ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); for (Attribute attribute : newAttributes) { resultSet.getAttributes().remove(attribute); resultSet.getAttributes().addRegular(attribute); } return resultSet; } else { // make attributes nominal ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); try { NumericToNominal toNominalOperator = OperatorService.createOperator(NumericToPolynominal.class); toNominalOperator.setParameter( AttributeSubsetSelector.PARAMETER_FILTER_TYPE, AttributeSubsetSelector.CONDITION_REGULAR_EXPRESSION + ""); toNominalOperator.setParameter( RegexpAttributeFilter.PARAMETER_REGULAR_EXPRESSION, getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES)); toNominalOperator.setParameter( AttributeSubsetSelector.PARAMETER_INCLUDE_SPECIAL_ATTRIBUTES, "true"); return toNominalOperator.apply(resultSet); } catch (OperatorCreationException e) { // otherwise compatibility could not be ensured return resultSet; } } } // for recent version table is correct: Deliver example set ExampleSet resultSet = table.createExampleSet(); resultSet.getAnnotations().addAll(exampleSet.getAnnotations()); return resultSet; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // init char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0); Character groupingCharacter = null; if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) { groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0); } Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false); int size = attributeSet.size(); int[] valueTypes = new int[size]; int index = 0; for (Attribute attribute : attributeSet) { valueTypes[index++] = attribute.getValueType(); } // guessing int[] guessedValueTypes = new int[valueTypes.length]; int checkedCounter = 0; for (Example example : exampleSet) { index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } double originalValue = example.getValue(attribute); if (!Double.isNaN(originalValue)) { if (guessedValueTypes[index] != Ontology.NOMINAL) { try { String valueString = example.getValueAsString(attribute); if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); double value = Double.parseDouble(valueString); if (guessedValueTypes[index] != Ontology.REAL) { if (Tools.isEqual(Math.round(value), value)) { guessedValueTypes[index] = Ontology.INTEGER; } else { guessedValueTypes[index] = Ontology.REAL; } } } } catch (NumberFormatException e) { guessedValueTypes[index] = Ontology.NOMINAL; checkedCounter++; } } } index++; } if (checkedCounter >= guessedValueTypes.length) { break; } } // the example set contains at least one example and the guessing was performed if (exampleSet.size() > 0) { valueTypes = guessedValueTypes; // new attributes List<AttributeRole> newAttributes = new LinkedList<AttributeRole>(); index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } AttributeRole role = exampleSet.getAttributes().getRole(attribute); Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]); exampleSet.getExampleTable().addAttribute(newAttribute); AttributeRole newRole = new AttributeRole(newAttribute); newRole.setSpecial(role.getSpecialName()); newAttributes.add(newRole); // copy data for (Example e : exampleSet) { double oldValue = e.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) { if (!Double.isNaN(oldValue)) { String valueString = e.getValueAsString(attribute); if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { e.setValue(newAttribute, Double.NaN); } else { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); e.setValue(newAttribute, Double.parseDouble(valueString)); } } else { e.setValue(newAttribute, Double.NaN); } } else { if (!Double.isNaN(oldValue)) { String value = e.getValueAsString(attribute); e.setValue(newAttribute, newAttribute.getMapping().mapString(value)); } else { e.setValue(newAttribute, Double.NaN); } } } // delete attribute and rename the new attribute (due to deletion and data scans: no // more memory used :-) exampleSet.getExampleTable().removeAttribute(attribute); exampleSet.getAttributes().remove(role); newAttribute.setName(attribute.getName()); index++; } for (AttributeRole role : newAttributes) { if (role.isSpecial()) { exampleSet .getAttributes() .setSpecialAttribute(role.getAttribute(), role.getSpecialName()); } else { exampleSet.getAttributes().addRegular(role.getAttribute()); } } } return exampleSet; }
public static void writeCSV( ExampleSet exampleSet, PrintWriter out, String colSeparator, boolean quoteNomValues, boolean writeAttribNames, boolean formatDate) { String columnSeparator = colSeparator; boolean quoteNominalValues = quoteNomValues; // write column names if (writeAttribNames) { Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); boolean first = true; while (a.hasNext()) { if (!first) out.print(columnSeparator); Attribute attribute = a.next(); String name = attribute.getName(); if (quoteNominalValues) { name = name.replaceAll("\"", "'"); name = "\"" + name + "\""; } out.print(name); first = false; } out.println(); } // write data for (Example example : exampleSet) { Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); boolean first = true; while (a.hasNext()) { Attribute attribute = a.next(); if (!first) out.print(columnSeparator); if (!Double.isNaN(example.getValue(attribute))) { if (attribute.isNominal()) { String stringValue = example.getValueAsString(attribute); if (quoteNominalValues) { stringValue = stringValue.replaceAll("\"", "'"); stringValue = "\"" + stringValue + "\""; } out.print(stringValue); } else { Double value = example.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { if (formatDate) { Date date = new Date(value.longValue()); String s = DateFormat.getInstance().format(date); out.print(s); } else { out.print(value); } } else { out.print(value); } } } first = false; } out.println(); } }
private void addEdges() { // remove old edges if available Iterator<String> e = edgeLabelMap.keySet().iterator(); while (e.hasNext()) { graph.removeEdge(e.next()); } edgeLabelMap.clear(); boolean isDistance = measure.isDistance(); Attribute id = exampleSet.getAttributes().getId(); List<SortableEdge> sortableEdges = new LinkedList<SortableEdge>(); for (int i = 0; i < exampleSet.size(); i++) { Example example = exampleSet.getExample(i); for (int j = i + 1; j < exampleSet.size(); j++) { Example comExample = exampleSet.getExample(j); if (isDistance) sortableEdges.add( new SortableEdge( example.getValueAsString(id), comExample.getValueAsString(id), null, measure.calculateDistance(example, comExample), SortableEdge.DIRECTION_INCREASE)); else sortableEdges.add( new SortableEdge( example.getValueAsString(id), comExample.getValueAsString(id), null, measure.calculateSimilarity(example, comExample), SortableEdge.DIRECTION_DECREASE)); } } Collections.sort(sortableEdges); int numberOfEdges = distanceSlider.getValue(); int counter = 0; double minStrength = Double.POSITIVE_INFINITY; double maxStrength = Double.NEGATIVE_INFINITY; Map<String, Double> strengthMap = new HashMap<String, Double>(); for (SortableEdge sortableEdge : sortableEdges) { if (counter > numberOfEdges) break; String idString = edgeFactory.create(); graph.addEdge( idString, sortableEdge.getFirstVertex(), sortableEdge.getSecondVertex(), EdgeType.UNDIRECTED); edgeLabelMap.put(idString, Tools.formatIntegerIfPossible(sortableEdge.getEdgeValue())); double strength = sortableEdge.getEdgeValue(); minStrength = Math.min(minStrength, strength); maxStrength = Math.max(maxStrength, strength); strengthMap.put(idString, strength); counter++; } for (Entry<String, Double> entry : strengthMap.entrySet()) { edgeStrengthMap.put( entry.getKey(), (entry.getValue() - minStrength) / (maxStrength - minStrength)); } }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); Tools.checkAndCreateIds(exampleSet); Attribute idAttribute = exampleSet.getAttributes().getId(); boolean idAttributeIsNominal = idAttribute.isNominal(); DistanceMatrix matrix = new DistanceMatrix(exampleSet.size()); Map<Integer, HierarchicalClusterNode> clusterMap = new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size()); int[] clusterIds = new int[exampleSet.size()]; // filling the distance matrix int nextClusterId = 0; for (Example example1 : exampleSet) { checkForStop(); clusterIds[nextClusterId] = nextClusterId; int y = 0; for (Example example2 : exampleSet) { if (y > nextClusterId) { matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2)); } y++; } if (idAttributeIsNominal) { clusterMap.put( nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute))); } else { clusterMap.put( nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute))); } nextClusterId++; } // creating linkage method AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds); if (getParameterAsString(PARAMETER_MODE).equals(modes[1])) { linkage = new CompleteLinkageMethod(matrix, clusterIds); } else if (getParameterAsString(PARAMETER_MODE).equals(modes[2])) { linkage = new AverageLinkageMethod(matrix, clusterIds); } // now building agglomerative tree bottom up while (clusterMap.size() > 1) { Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap); HierarchicalClusterNode newNode = new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance()); newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1())); newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2())); clusterMap.remove(agglomeration.getClusterId1()); clusterMap.remove(agglomeration.getClusterId2()); clusterMap.put(nextClusterId, newNode); nextClusterId++; } // creating model HierarchicalClusterModel model = new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue()); // registering visualizer ObjectVisualizerService.addObjectVisualizer( model, new ExampleVisualizer((ExampleSet) exampleSet.clone())); modelOutput.deliver(model); exampleSetOutput.deliver(exampleSet); }