@Override public Model learn(ExampleSet exampleSet) throws OperatorException { DistanceMeasure measure = DistanceMeasures.createMeasure(this); measure.init(exampleSet); GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure); // check if weights should be used boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS); // check if robust estimate should be performed: Then calculate weights and use it anyway if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) { useWeights = true; LocalPolynomialExampleWeightingOperator weightingOperator; try { weightingOperator = OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class); exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this); } catch (OperatorCreationException e) { throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage()); } } Attributes attributes = exampleSet.getAttributes(); Attribute label = attributes.getLabel(); Attribute weightAttribute = attributes.getWeight(); for (Example example : exampleSet) { double[] values = new double[attributes.size()]; double labelValue = example.getValue(label); double weight = 1d; if (weightAttribute != null && useWeights) { weight = example.getValue(weightAttribute); } // filter out examples without influence if (weight > 0d) { // copying example values int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } // inserting into geometric data collection data.add(values, new RegressionData(values, labelValue, weight)); } } return new LocalPolynomialRegressionModel( exampleSet, data, Neighborhoods.createNeighborhood(this), SmoothingKernels.createKernel(this), getParameterAsInt(PARAMETER_DEGREE), getParameterAsDouble(PARAMETER_RIDGE)); }
public void readModel(int n, int k, double[][] points, int[] weight, DistanceMeasure measure) throws OperatorException { if (modelInput.isConnected()) { KNNCollectionModel input; input = modelInput.getData(KNNCollectionModel.class); knnCollection = input.get(); newCollection = false; if (k > knnCollection.getK() || !Arrays.deepEquals(knnCollection.getPoints(), points) || !measure.getClass().toString().equals(input.measure.getClass().toString())) { if (k > knnCollection.getK()) { this.logNote("Model at input port can not be used (k too small)."); } else { this.logNote("Model at input port can not be used (Model andExampleSet not matching)."); } knnCollection = new KNNCollection(n, k, points, weight); newCollection = true; } else { this.logNote(" Model at input port used for speeding up the operator."); } if (k < knnCollection.getK()) { knnCollection = KNNCollection.clone(knnCollection); knnCollection.shrink(knnCollection.getK() - k); } } else { knnCollection = new KNNCollection(n, k, points, weight); newCollection = true; } }
// checking for example set and valid attributes @Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNominalAttributes(exampleSet, "nominal similarities"); this.useAttribute = new boolean[exampleSet.getAttributes().size()]; int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNominal()) { useAttribute[i] = true; } i++; } }
public SimilarityVisualization(SimilarityMeasureObject sim, ExampleSet exampleSet) { super(); setLayout(new BorderLayout()); DistanceMeasure measure = sim.getDistanceMeasure(); ButtonGroup group = new ButtonGroup(); JPanel togglePanel = new JPanel(new FlowLayout(FlowLayout.LEFT)); // similarity table final JComponent tableView = new SimilarityTable(measure, exampleSet); final JRadioButton tableButton = new JRadioButton("Table View", true); tableButton.addActionListener( new ActionListener() { public void actionPerformed(ActionEvent e) { if (tableButton.isSelected()) { remove(1); add(tableView, BorderLayout.CENTER); repaint(); } } }); group.add(tableButton); togglePanel.add(tableButton); // graph view final JComponent graphView = new GraphViewer<String, String>(new SimilarityGraphCreator(measure, exampleSet)); final JRadioButton graphButton = new JRadioButton("Graph View", false); graphButton.addActionListener( new ActionListener() { public void actionPerformed(ActionEvent e) { if (graphButton.isSelected()) { remove(1); add(graphView, BorderLayout.CENTER); repaint(); } } }); group.add(graphButton); togglePanel.add(graphButton); // histogram view DataTable dataTable = new SimpleDataTable("Histogram", new String[] {"Histogram"}); double sampleRatio = Math.min(1.0d, 500.0d / exampleSet.size()); Random random = new Random(); int i = 0; for (Example example : exampleSet) { int j = 0; for (Example compExample : exampleSet) { if (i != j && random.nextDouble() < sampleRatio) { double simValue = measure.calculateSimilarity(example, compExample); dataTable.add(new SimpleDataTableRow(new double[] {simValue})); } j++; } i++; } final PlotterConfigurationModel settings = new PlotterConfigurationModel(PlotterConfigurationModel.HISTOGRAM_PLOT, dataTable); settings.enablePlotColumn(0); settings.setParameterAsInt(HistogramChart.PARAMETER_NUMBER_OF_BINS, 100); final JRadioButton histogramButton = new JRadioButton("Histogram View", false); histogramButton.addActionListener( new ActionListener() { public void actionPerformed(ActionEvent e) { if (histogramButton.isSelected()) { remove(1); add(settings.getPlotter().getPlotter(), BorderLayout.CENTER); repaint(); } } }); group.add(histogramButton); togglePanel.add(histogramButton); // K distance view final SimilarityKDistanceVisualization kDistancePlotter = new SimilarityKDistanceVisualization(measure, exampleSet); final JRadioButton kdistanceButton = new JRadioButton("k-Distance View", false); kdistanceButton.addActionListener( new ActionListener() { public void actionPerformed(ActionEvent e) { if (kdistanceButton.isSelected()) { remove(1); add(kDistancePlotter, BorderLayout.CENTER); repaint(); } } }); group.add(kdistanceButton); togglePanel.add(kdistanceButton); add(togglePanel, BorderLayout.NORTH); add(tableView, BorderLayout.CENTER); }
public double[] evaluate() { // the result will contain MDEF/ SIgmaMDEF the higher this ratio is the // more outling the result is.Lower than or equal 3 is not considered an // outlier double[] result = new double[n]; DistancePair[][] criticalDistances = new DistancePair[n][2 * n]; int secondDimension = 2 * n; // preprocessing for (int i = 0; i < n; i++) { int firstIndex = i << 1; int secondIndex = firstIndex + 1; int current = secondIndex + 1; // cardinality -2 means that there actually a point criticalDistances[i][firstIndex] = new DistancePair(0, -2, i); // cardinality -1 means that there is no point just alpha critical // distance criticalDistances[i][secondIndex] = new DistancePair(0, -1, -1); for (int j = i + 1; j < n; j++) { // draw back this assumes that the distance measure is symmetric double currentDistance = measure.calculateDistance(points[i], points[j]); double alphaCurrentDistance = currentDistance / alpha; criticalDistances[i][current++] = new DistancePair(currentDistance, -2, j); criticalDistances[i][current++] = new DistancePair(alphaCurrentDistance, -1, -1); criticalDistances[j][firstIndex] = new DistancePair(currentDistance, -2, i); criticalDistances[j][secondIndex] = new DistancePair(alphaCurrentDistance, -1, -1); } Arrays.sort(criticalDistances[i]); int cardinality = 0; for (int j = 0; j < secondDimension; j++) { if (criticalDistances[i][j].cardinality == -2) { cardinality += weight[criticalDistances[i][j].index]; } criticalDistances[i][j].cardinality = cardinality; } } // computation of MDEF for (int i = 0; i < n; i++) { result[i] = 0; for (int j = 0; j < secondDimension; j++) { if (criticalDistances[i][j].cardinality < nmin) continue; if (j != secondDimension - 1 && criticalDistances[i][j].distance == criticalDistances[i][j + 1].distance) continue; // alpha r distance double alphaR = criticalDistances[i][j].distance * alpha; int nPR = criticalDistances[i][j].cardinality; int nPRAlpha = find(0, secondDimension, alphaR, criticalDistances[i]); double summationNPRALpha = 0.0; // this is the loop I should try to vanish for (int k = 0; k <= j; k++) { int index = criticalDistances[i][k].index; if (index == -1) continue; int currentNRPAlpa = find(0, secondDimension, alphaR, criticalDistances[index]); summationNPRALpha += currentNRPAlpa; } double nHatPRAlpha = summationNPRALpha * 1.0 / nPR; double squaredNPRAlpha = 0.0; for (int k = 0; k <= j; k++) { int index = criticalDistances[i][k].index; if (index == -1) continue; int currentNRPAlpa = find(0, secondDimension, alphaR, criticalDistances[index]); double delta = currentNRPAlpa - nHatPRAlpha; squaredNPRAlpha += delta * delta; } double sigmaPRAlpha = Math.sqrt(squaredNPRAlpha / nPR); double MDEF = 1.0 - nPRAlpha / nHatPRAlpha; double sigmaMDEF = sigmaPRAlpha / nHatPRAlpha; double currentRes; if (sigmaMDEF == 0) currentRes = 0; else currentRes = MDEF / sigmaMDEF; if (currentRes > result[i]) result[i] = currentRes; } } return result; }
@Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNumericalAttributes(exampleSet, "value based similarities"); }
private void addEdges() { // remove old edges if available Iterator<String> e = edgeLabelMap.keySet().iterator(); while (e.hasNext()) { graph.removeEdge(e.next()); } edgeLabelMap.clear(); boolean isDistance = measure.isDistance(); Attribute id = exampleSet.getAttributes().getId(); List<SortableEdge> sortableEdges = new LinkedList<SortableEdge>(); for (int i = 0; i < exampleSet.size(); i++) { Example example = exampleSet.getExample(i); for (int j = i + 1; j < exampleSet.size(); j++) { Example comExample = exampleSet.getExample(j); if (isDistance) sortableEdges.add( new SortableEdge( example.getValueAsString(id), comExample.getValueAsString(id), null, measure.calculateDistance(example, comExample), SortableEdge.DIRECTION_INCREASE)); else sortableEdges.add( new SortableEdge( example.getValueAsString(id), comExample.getValueAsString(id), null, measure.calculateSimilarity(example, comExample), SortableEdge.DIRECTION_DECREASE)); } } Collections.sort(sortableEdges); int numberOfEdges = distanceSlider.getValue(); int counter = 0; double minStrength = Double.POSITIVE_INFINITY; double maxStrength = Double.NEGATIVE_INFINITY; Map<String, Double> strengthMap = new HashMap<String, Double>(); for (SortableEdge sortableEdge : sortableEdges) { if (counter > numberOfEdges) break; String idString = edgeFactory.create(); graph.addEdge( idString, sortableEdge.getFirstVertex(), sortableEdge.getSecondVertex(), EdgeType.UNDIRECTED); edgeLabelMap.put(idString, Tools.formatIntegerIfPossible(sortableEdge.getEdgeValue())); double strength = sortableEdge.getEdgeValue(); minStrength = Math.min(minStrength, strength); maxStrength = Math.max(maxStrength, strength); strengthMap.put(idString, strength); counter++; } for (Entry<String, Double> entry : strengthMap.entrySet()) { edgeStrengthMap.put( entry.getKey(), (entry.getValue() - minStrength) / (maxStrength - minStrength)); } }
/* * calculate max score for each point */ private void calculateAllScores() { TreeNode[] counting = new TreeNode[grids.length]; TreeNode[] sampling = new TreeNode[grids.length]; double[][] countingCenter = new double[grids.length][dimensions]; double[][] samplingCenter = new double[grids.length][dimensions]; for (int p = 0; p < points.length; ++p) { for (int g = 0; g < grids.length; ++g) { counting[g] = root[g]; sampling[g] = root[g]; countingCenter[g] = createPoint(dimensions, Rp / 2); samplingCenter[g] = createPoint(dimensions, Rp / 2); } double countingRadius = Rp; double samplingRadius = Rp; for (int level = 0; level < alpha; ++level) { countingRadius /= 2; for (int g = 0; g < grids.length; ++g) { int index = cellFinder(points[p], countingCenter[g], grids[g], countingRadius / 2); counting[g] = counting[g].getChild(index); } } for (int level = alpha; level <= levels; ++level) { double dist = Double.MAX_VALUE; int cellIndex = -1; for (int g = 0; g < grids.length; ++g) { double newDistance = measure.calculateDistance(move(points[p], grids[g], true), countingCenter[g]); if (newDistance < dist) { dist = newDistance; cellIndex = g; } } dist = Double.MAX_VALUE; int cellIndex2 = -1; for (int g = 0; g < grids.length; ++g) { double newDistance = measure.calculateDistance( move(samplingCenter[g], grids[g], false), move(countingCenter[cellIndex], grids[cellIndex], false)); if (newDistance < dist) { dist = newDistance; cellIndex2 = g; } } countingRadius /= 2; samplingRadius /= 2; calculateScore( counting[cellIndex], sampling[cellIndex2], p, level, samplingCenter[cellIndex2]); if (level < levels) for (int g = 0; g < grids.length; ++g) { int nextChild = cellFinder(points[p], countingCenter[g], grids[g], countingRadius / 2); counting[g] = counting[g].getChild(nextChild); nextChild = cellFinder(points[p], samplingCenter[g], grids[g], samplingRadius / 2); sampling[g] = sampling[g].getChild(nextChild); } } } }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); Tools.checkAndCreateIds(exampleSet); Attribute idAttribute = exampleSet.getAttributes().getId(); boolean idAttributeIsNominal = idAttribute.isNominal(); DistanceMatrix matrix = new DistanceMatrix(exampleSet.size()); Map<Integer, HierarchicalClusterNode> clusterMap = new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size()); int[] clusterIds = new int[exampleSet.size()]; // filling the distance matrix int nextClusterId = 0; for (Example example1 : exampleSet) { checkForStop(); clusterIds[nextClusterId] = nextClusterId; int y = 0; for (Example example2 : exampleSet) { if (y > nextClusterId) { matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2)); } y++; } if (idAttributeIsNominal) { clusterMap.put( nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute))); } else { clusterMap.put( nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute))); } nextClusterId++; } // creating linkage method AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds); if (getParameterAsString(PARAMETER_MODE).equals(modes[1])) { linkage = new CompleteLinkageMethod(matrix, clusterIds); } else if (getParameterAsString(PARAMETER_MODE).equals(modes[2])) { linkage = new AverageLinkageMethod(matrix, clusterIds); } // now building agglomerative tree bottom up while (clusterMap.size() > 1) { Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap); HierarchicalClusterNode newNode = new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance()); newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1())); newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2())); clusterMap.remove(agglomeration.getClusterId1()); clusterMap.remove(agglomeration.getClusterId2()); clusterMap.put(nextClusterId, newNode); nextClusterId++; } // creating model HierarchicalClusterModel model = new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue()); // registering visualizer ObjectVisualizerService.addObjectVisualizer( model, new ExampleVisualizer((ExampleSet) exampleSet.clone())); modelOutput.deliver(model); exampleSetOutput.deliver(exampleSet); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // creating kernel and settings from Parameters int k = Math.min(100, exampleSet.getAttributes().size() * 2); int size = exampleSet.size(); switch (getParameterAsInt(PARAMETER_SAMPLE)) { case SAMPLE_ABSOLUTE: size = getParameterAsInt(PARAMETER_SAMPLE_SIZE); break; case SAMPLE_RELATIVE: size = (int) Math.round(exampleSet.size() * getParameterAsDouble(PARAMETER_SAMPLE_RATIO)); break; } DistanceMeasure distanceMeasure = new EuclideanDistance(); distanceMeasure.init(exampleSet); // finding farthest and nearest example to mean Vector double[] meanVector = getMeanVector(exampleSet); Candidate min = new Candidate(meanVector, Double.POSITIVE_INFINITY, 0); Candidate max = new Candidate(meanVector, Double.NEGATIVE_INFINITY, 0); int i = 0; for (Example example : exampleSet) { double[] exampleValues = getExampleValues(example); Candidate current = new Candidate( exampleValues, Math.abs(distanceMeasure.calculateDistance(meanVector, exampleValues)), i); if (current.compareTo(min) < 0) { min = current; } if (current.compareTo(max) > 0) { max = current; } i++; } ArrayList<Candidate> recentlySelected = new ArrayList<Candidate>(10); int[] partition = new int[exampleSet.size()]; int numberOfSelectedExamples = 2; recentlySelected.add(min); recentlySelected.add(max); partition[min.getExampleIndex()] = 1; partition[max.getExampleIndex()] = 1; double[] minimalDistances = new double[exampleSet.size()]; Arrays.fill(minimalDistances, Double.POSITIVE_INFINITY); // running now through examples, checking for smallest distance to one of the candidates while (numberOfSelectedExamples < size) { TreeSet<Candidate> candidates = new TreeSet<Candidate>(); i = 0; // check distance only for candidates recently selected. for (Example example : exampleSet) { // if example not has been selected allready if (partition[i] == 0) { double[] exampleValues = getExampleValues(example); for (Candidate candidate : recentlySelected) { minimalDistances[i] = Math.min( minimalDistances[i], Math.abs( distanceMeasure.calculateDistance(exampleValues, candidate.getValues()))); } Candidate newCandidate = new Candidate(exampleValues, minimalDistances[i], i); candidates.add(newCandidate); if (candidates.size() > k) { Iterator<Candidate> iterator = candidates.iterator(); iterator.next(); iterator.remove(); } } i++; } // clearing recently selected since now new ones will be selected recentlySelected.clear(); // now running in descending order through candidates and adding to selected // IM: descendingIterator() is not available in Java versions less than 6 !!! // IM: Bad workaround for now by adding all candidates into a list and using a listIterator() // and hasPrevious... /* Iterator<Candidate> descendingIterator = candidates.descendingIterator(); while (descendingIterator.hasNext() && numberOfSelectedExamples < desiredNumber) { Candidate candidate = descendingIterator.next(); */ List<Candidate> reverseCandidateList = new LinkedList<Candidate>(); Iterator<Candidate> it = candidates.iterator(); while (it.hasNext()) { reverseCandidateList.add(it.next()); } ListIterator<Candidate> lit = reverseCandidateList.listIterator(reverseCandidateList.size() - 1); while (lit.hasPrevious()) { Candidate candidate = lit.previous(); // IM: end of workaround boolean existSmallerDistance = false; Iterator<Candidate> addedIterator = recentlySelected.iterator(); // test if a distance to recently selected is smaller than previously calculated minimal // distance // if one exists: This is not selected while (addedIterator.hasNext()) { double distance = Math.abs( distanceMeasure.calculateDistance( addedIterator.next().getValues(), candidate.getValues())); existSmallerDistance = existSmallerDistance || distance < candidate.getDistance(); } if (!existSmallerDistance) { recentlySelected.add(candidate); partition[candidate.getExampleIndex()] = 1; numberOfSelectedExamples++; } else break; } } // building new exampleSet containing only Examples with indices in selectedExamples SplittedExampleSet sample = new SplittedExampleSet(exampleSet, new Partition(partition, 2)); sample.selectSingleSubset(1); return sample; }