@Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    DistanceMeasure measure = DistanceMeasures.createMeasure(this);
    measure.init(exampleSet);
    GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);

    // check if weights should be used
    boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
    // check if robust estimate should be performed: Then calculate weights and use it anyway
    if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
      useWeights = true;
      LocalPolynomialExampleWeightingOperator weightingOperator;
      try {
        weightingOperator =
            OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
        exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
      } catch (OperatorCreationException e) {
        throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
      }
    }

    Attributes attributes = exampleSet.getAttributes();
    Attribute label = attributes.getLabel();
    Attribute weightAttribute = attributes.getWeight();
    for (Example example : exampleSet) {
      double[] values = new double[attributes.size()];
      double labelValue = example.getValue(label);
      double weight = 1d;
      if (weightAttribute != null && useWeights) {
        weight = example.getValue(weightAttribute);
      }

      // filter out examples without influence
      if (weight > 0d) {
        // copying example values
        int i = 0;
        for (Attribute attribute : attributes) {
          values[i] = example.getValue(attribute);
          i++;
        }

        // inserting into geometric data collection
        data.add(values, new RegressionData(values, labelValue, weight));
      }
    }
    return new LocalPolynomialRegressionModel(
        exampleSet,
        data,
        Neighborhoods.createNeighborhood(this),
        SmoothingKernels.createKernel(this),
        getParameterAsInt(PARAMETER_DEGREE),
        getParameterAsDouble(PARAMETER_RIDGE));
  }
  public void readModel(int n, int k, double[][] points, int[] weight, DistanceMeasure measure)
      throws OperatorException {
    if (modelInput.isConnected()) {
      KNNCollectionModel input;
      input = modelInput.getData(KNNCollectionModel.class);
      knnCollection = input.get();
      newCollection = false;
      if (k > knnCollection.getK()
          || !Arrays.deepEquals(knnCollection.getPoints(), points)
          || !measure.getClass().toString().equals(input.measure.getClass().toString())) {
        if (k > knnCollection.getK()) {
          this.logNote("Model at input port can not be used (k too small).");
        } else {
          this.logNote("Model at input port can not be used (Model andExampleSet not matching).");
        }
        knnCollection = new KNNCollection(n, k, points, weight);
        newCollection = true;

      } else {
        this.logNote(" Model at input port used for speeding up the operator.");
      }
      if (k < knnCollection.getK()) {
        knnCollection = KNNCollection.clone(knnCollection);
        knnCollection.shrink(knnCollection.getK() - k);
      }
    } else {
      knnCollection = new KNNCollection(n, k, points, weight);
      newCollection = true;
    }
  }
コード例 #3
0
 // checking for example set and valid attributes
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNominalAttributes(exampleSet, "nominal similarities");
   this.useAttribute = new boolean[exampleSet.getAttributes().size()];
   int i = 0;
   for (Attribute attribute : exampleSet.getAttributes()) {
     if (attribute.isNominal()) {
       useAttribute[i] = true;
     }
     i++;
   }
 }
コード例 #4
0
  public SimilarityVisualization(SimilarityMeasureObject sim, ExampleSet exampleSet) {
    super();
    setLayout(new BorderLayout());

    DistanceMeasure measure = sim.getDistanceMeasure();
    ButtonGroup group = new ButtonGroup();
    JPanel togglePanel = new JPanel(new FlowLayout(FlowLayout.LEFT));

    // similarity table
    final JComponent tableView = new SimilarityTable(measure, exampleSet);
    final JRadioButton tableButton = new JRadioButton("Table View", true);
    tableButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (tableButton.isSelected()) {
              remove(1);
              add(tableView, BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(tableButton);
    togglePanel.add(tableButton);

    // graph view
    final JComponent graphView =
        new GraphViewer<String, String>(new SimilarityGraphCreator(measure, exampleSet));
    final JRadioButton graphButton = new JRadioButton("Graph View", false);
    graphButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (graphButton.isSelected()) {
              remove(1);
              add(graphView, BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(graphButton);
    togglePanel.add(graphButton);

    // histogram view
    DataTable dataTable = new SimpleDataTable("Histogram", new String[] {"Histogram"});
    double sampleRatio = Math.min(1.0d, 500.0d / exampleSet.size());

    Random random = new Random();
    int i = 0;
    for (Example example : exampleSet) {
      int j = 0;
      for (Example compExample : exampleSet) {
        if (i != j && random.nextDouble() < sampleRatio) {
          double simValue = measure.calculateSimilarity(example, compExample);
          dataTable.add(new SimpleDataTableRow(new double[] {simValue}));
        }
        j++;
      }
      i++;
    }

    final PlotterConfigurationModel settings =
        new PlotterConfigurationModel(PlotterConfigurationModel.HISTOGRAM_PLOT, dataTable);
    settings.enablePlotColumn(0);
    settings.setParameterAsInt(HistogramChart.PARAMETER_NUMBER_OF_BINS, 100);

    final JRadioButton histogramButton = new JRadioButton("Histogram View", false);
    histogramButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (histogramButton.isSelected()) {
              remove(1);
              add(settings.getPlotter().getPlotter(), BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(histogramButton);
    togglePanel.add(histogramButton);

    // K distance view
    final SimilarityKDistanceVisualization kDistancePlotter =
        new SimilarityKDistanceVisualization(measure, exampleSet);
    final JRadioButton kdistanceButton = new JRadioButton("k-Distance View", false);
    kdistanceButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (kdistanceButton.isSelected()) {
              remove(1);
              add(kDistancePlotter, BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(kdistanceButton);
    togglePanel.add(kdistanceButton);

    add(togglePanel, BorderLayout.NORTH);
    add(tableView, BorderLayout.CENTER);
  }
コード例 #5
0
  public double[] evaluate() {
    // the result will contain MDEF/ SIgmaMDEF the higher this ratio is the
    // more outling the result is.Lower than or equal 3 is not considered an
    // outlier
    double[] result = new double[n];
    DistancePair[][] criticalDistances = new DistancePair[n][2 * n];
    int secondDimension = 2 * n;
    // preprocessing
    for (int i = 0; i < n; i++) {
      int firstIndex = i << 1;
      int secondIndex = firstIndex + 1;
      int current = secondIndex + 1;
      // cardinality -2 means that there actually a point
      criticalDistances[i][firstIndex] = new DistancePair(0, -2, i);
      // cardinality -1 means that there is no point just alpha critical
      // distance
      criticalDistances[i][secondIndex] = new DistancePair(0, -1, -1);
      for (int j = i + 1; j < n; j++) {
        // draw back this assumes that the distance measure is symmetric
        double currentDistance = measure.calculateDistance(points[i], points[j]);
        double alphaCurrentDistance = currentDistance / alpha;

        criticalDistances[i][current++] = new DistancePair(currentDistance, -2, j);
        criticalDistances[i][current++] = new DistancePair(alphaCurrentDistance, -1, -1);

        criticalDistances[j][firstIndex] = new DistancePair(currentDistance, -2, i);

        criticalDistances[j][secondIndex] = new DistancePair(alphaCurrentDistance, -1, -1);
      }
      Arrays.sort(criticalDistances[i]);
      int cardinality = 0;

      for (int j = 0; j < secondDimension; j++) {

        if (criticalDistances[i][j].cardinality == -2) {
          cardinality += weight[criticalDistances[i][j].index];
        }
        criticalDistances[i][j].cardinality = cardinality;
      }
    }
    // computation of MDEF
    for (int i = 0; i < n; i++) {
      result[i] = 0;
      for (int j = 0; j < secondDimension; j++) {
        if (criticalDistances[i][j].cardinality < nmin) continue;
        if (j != secondDimension - 1
            && criticalDistances[i][j].distance == criticalDistances[i][j + 1].distance) continue;

        // alpha r distance
        double alphaR = criticalDistances[i][j].distance * alpha;

        int nPR = criticalDistances[i][j].cardinality;

        int nPRAlpha = find(0, secondDimension, alphaR, criticalDistances[i]);

        double summationNPRALpha = 0.0;
        // this is the loop I should try to vanish
        for (int k = 0; k <= j; k++) {
          int index = criticalDistances[i][k].index;
          if (index == -1) continue;
          int currentNRPAlpa = find(0, secondDimension, alphaR, criticalDistances[index]);
          summationNPRALpha += currentNRPAlpa;
        }
        double nHatPRAlpha = summationNPRALpha * 1.0 / nPR;

        double squaredNPRAlpha = 0.0;

        for (int k = 0; k <= j; k++) {
          int index = criticalDistances[i][k].index;
          if (index == -1) continue;
          int currentNRPAlpa = find(0, secondDimension, alphaR, criticalDistances[index]);
          double delta = currentNRPAlpa - nHatPRAlpha;

          squaredNPRAlpha += delta * delta;
        }

        double sigmaPRAlpha = Math.sqrt(squaredNPRAlpha / nPR);

        double MDEF = 1.0 - nPRAlpha / nHatPRAlpha;
        double sigmaMDEF = sigmaPRAlpha / nHatPRAlpha;
        double currentRes;
        if (sigmaMDEF == 0) currentRes = 0;
        else currentRes = MDEF / sigmaMDEF;
        if (currentRes > result[i]) result[i] = currentRes;
      }
    }

    return result;
  }
コード例 #6
0
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNumericalAttributes(exampleSet, "value based similarities");
 }
  private void addEdges() {
    // remove old edges if available
    Iterator<String> e = edgeLabelMap.keySet().iterator();
    while (e.hasNext()) {
      graph.removeEdge(e.next());
    }
    edgeLabelMap.clear();

    boolean isDistance = measure.isDistance();
    Attribute id = exampleSet.getAttributes().getId();
    List<SortableEdge> sortableEdges = new LinkedList<SortableEdge>();
    for (int i = 0; i < exampleSet.size(); i++) {
      Example example = exampleSet.getExample(i);
      for (int j = i + 1; j < exampleSet.size(); j++) {
        Example comExample = exampleSet.getExample(j);
        if (isDistance)
          sortableEdges.add(
              new SortableEdge(
                  example.getValueAsString(id),
                  comExample.getValueAsString(id),
                  null,
                  measure.calculateDistance(example, comExample),
                  SortableEdge.DIRECTION_INCREASE));
        else
          sortableEdges.add(
              new SortableEdge(
                  example.getValueAsString(id),
                  comExample.getValueAsString(id),
                  null,
                  measure.calculateSimilarity(example, comExample),
                  SortableEdge.DIRECTION_DECREASE));
      }
    }

    Collections.sort(sortableEdges);

    int numberOfEdges = distanceSlider.getValue();
    int counter = 0;
    double minStrength = Double.POSITIVE_INFINITY;
    double maxStrength = Double.NEGATIVE_INFINITY;
    Map<String, Double> strengthMap = new HashMap<String, Double>();
    for (SortableEdge sortableEdge : sortableEdges) {
      if (counter > numberOfEdges) break;

      String idString = edgeFactory.create();
      graph.addEdge(
          idString,
          sortableEdge.getFirstVertex(),
          sortableEdge.getSecondVertex(),
          EdgeType.UNDIRECTED);
      edgeLabelMap.put(idString, Tools.formatIntegerIfPossible(sortableEdge.getEdgeValue()));

      double strength = sortableEdge.getEdgeValue();

      minStrength = Math.min(minStrength, strength);
      maxStrength = Math.max(maxStrength, strength);

      strengthMap.put(idString, strength);

      counter++;
    }

    for (Entry<String, Double> entry : strengthMap.entrySet()) {
      edgeStrengthMap.put(
          entry.getKey(), (entry.getValue() - minStrength) / (maxStrength - minStrength));
    }
  }
コード例 #8
0
  /*
   * calculate max score for each point
   */
  private void calculateAllScores() {
    TreeNode[] counting = new TreeNode[grids.length];
    TreeNode[] sampling = new TreeNode[grids.length];

    double[][] countingCenter = new double[grids.length][dimensions];
    double[][] samplingCenter = new double[grids.length][dimensions];

    for (int p = 0; p < points.length; ++p) {
      for (int g = 0; g < grids.length; ++g) {
        counting[g] = root[g];
        sampling[g] = root[g];

        countingCenter[g] = createPoint(dimensions, Rp / 2);
        samplingCenter[g] = createPoint(dimensions, Rp / 2);
      }

      double countingRadius = Rp;
      double samplingRadius = Rp;

      for (int level = 0; level < alpha; ++level) {
        countingRadius /= 2;
        for (int g = 0; g < grids.length; ++g) {
          int index = cellFinder(points[p], countingCenter[g], grids[g], countingRadius / 2);
          counting[g] = counting[g].getChild(index);
        }
      }
      for (int level = alpha; level <= levels; ++level) {
        double dist = Double.MAX_VALUE;
        int cellIndex = -1;
        for (int g = 0; g < grids.length; ++g) {
          double newDistance =
              measure.calculateDistance(move(points[p], grids[g], true), countingCenter[g]);
          if (newDistance < dist) {
            dist = newDistance;
            cellIndex = g;
          }
        }
        dist = Double.MAX_VALUE;
        int cellIndex2 = -1;
        for (int g = 0; g < grids.length; ++g) {
          double newDistance =
              measure.calculateDistance(
                  move(samplingCenter[g], grids[g], false),
                  move(countingCenter[cellIndex], grids[cellIndex], false));
          if (newDistance < dist) {
            dist = newDistance;
            cellIndex2 = g;
          }
        }
        countingRadius /= 2;
        samplingRadius /= 2;
        calculateScore(
            counting[cellIndex], sampling[cellIndex2], p, level, samplingCenter[cellIndex2]);

        if (level < levels)
          for (int g = 0; g < grids.length; ++g) {
            int nextChild = cellFinder(points[p], countingCenter[g], grids[g], countingRadius / 2);
            counting[g] = counting[g].getChild(nextChild);
            nextChild = cellFinder(points[p], samplingCenter[g], grids[g], samplingRadius / 2);
            sampling[g] = sampling[g].getChild(nextChild);
          }
      }
    }
  }
コード例 #9
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);
    Tools.checkAndCreateIds(exampleSet);

    Attribute idAttribute = exampleSet.getAttributes().getId();
    boolean idAttributeIsNominal = idAttribute.isNominal();
    DistanceMatrix matrix = new DistanceMatrix(exampleSet.size());
    Map<Integer, HierarchicalClusterNode> clusterMap =
        new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size());
    int[] clusterIds = new int[exampleSet.size()];
    // filling the distance matrix
    int nextClusterId = 0;
    for (Example example1 : exampleSet) {
      checkForStop();
      clusterIds[nextClusterId] = nextClusterId;
      int y = 0;
      for (Example example2 : exampleSet) {
        if (y > nextClusterId) {
          matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2));
        }
        y++;
      }
      if (idAttributeIsNominal) {
        clusterMap.put(
            nextClusterId,
            new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute)));
      } else {
        clusterMap.put(
            nextClusterId,
            new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute)));
      }
      nextClusterId++;
    }

    // creating linkage method
    AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds);
    if (getParameterAsString(PARAMETER_MODE).equals(modes[1])) {
      linkage = new CompleteLinkageMethod(matrix, clusterIds);
    } else if (getParameterAsString(PARAMETER_MODE).equals(modes[2])) {
      linkage = new AverageLinkageMethod(matrix, clusterIds);
    }

    // now building agglomerative tree bottom up
    while (clusterMap.size() > 1) {
      Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap);
      HierarchicalClusterNode newNode =
          new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance());
      newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1()));
      newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2()));
      clusterMap.remove(agglomeration.getClusterId1());
      clusterMap.remove(agglomeration.getClusterId2());
      clusterMap.put(nextClusterId, newNode);
      nextClusterId++;
    }

    // creating model
    HierarchicalClusterModel model =
        new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue());

    // registering visualizer
    ObjectVisualizerService.addObjectVisualizer(
        model, new ExampleVisualizer((ExampleSet) exampleSet.clone()));

    modelOutput.deliver(model);
    exampleSetOutput.deliver(exampleSet);
  }
コード例 #10
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // creating kernel and settings from Parameters
    int k = Math.min(100, exampleSet.getAttributes().size() * 2);
    int size = exampleSet.size();
    switch (getParameterAsInt(PARAMETER_SAMPLE)) {
      case SAMPLE_ABSOLUTE:
        size = getParameterAsInt(PARAMETER_SAMPLE_SIZE);
        break;
      case SAMPLE_RELATIVE:
        size = (int) Math.round(exampleSet.size() * getParameterAsDouble(PARAMETER_SAMPLE_RATIO));
        break;
    }

    DistanceMeasure distanceMeasure = new EuclideanDistance();
    distanceMeasure.init(exampleSet);

    // finding farthest and nearest example to mean Vector
    double[] meanVector = getMeanVector(exampleSet);
    Candidate min = new Candidate(meanVector, Double.POSITIVE_INFINITY, 0);
    Candidate max = new Candidate(meanVector, Double.NEGATIVE_INFINITY, 0);
    int i = 0;
    for (Example example : exampleSet) {
      double[] exampleValues = getExampleValues(example);
      Candidate current =
          new Candidate(
              exampleValues,
              Math.abs(distanceMeasure.calculateDistance(meanVector, exampleValues)),
              i);
      if (current.compareTo(min) < 0) {
        min = current;
      }
      if (current.compareTo(max) > 0) {
        max = current;
      }
      i++;
    }
    ArrayList<Candidate> recentlySelected = new ArrayList<Candidate>(10);
    int[] partition = new int[exampleSet.size()];
    int numberOfSelectedExamples = 2;
    recentlySelected.add(min);
    recentlySelected.add(max);
    partition[min.getExampleIndex()] = 1;
    partition[max.getExampleIndex()] = 1;
    double[] minimalDistances = new double[exampleSet.size()];
    Arrays.fill(minimalDistances, Double.POSITIVE_INFINITY);

    // running now through examples, checking for smallest distance to one of the candidates
    while (numberOfSelectedExamples < size) {
      TreeSet<Candidate> candidates = new TreeSet<Candidate>();

      i = 0;
      // check distance only for candidates recently selected.
      for (Example example : exampleSet) {
        // if example not has been selected allready
        if (partition[i] == 0) {
          double[] exampleValues = getExampleValues(example);
          for (Candidate candidate : recentlySelected) {
            minimalDistances[i] =
                Math.min(
                    minimalDistances[i],
                    Math.abs(
                        distanceMeasure.calculateDistance(exampleValues, candidate.getValues())));
          }
          Candidate newCandidate = new Candidate(exampleValues, minimalDistances[i], i);
          candidates.add(newCandidate);
          if (candidates.size() > k) {
            Iterator<Candidate> iterator = candidates.iterator();
            iterator.next();
            iterator.remove();
          }
        }
        i++;
      }
      // clearing recently selected since now new ones will be selected
      recentlySelected.clear();

      // now running in descending order through candidates and adding to selected
      // IM: descendingIterator() is not available in Java versions less than 6 !!!
      // IM: Bad workaround for now by adding all candidates into a list and using a listIterator()
      // and hasPrevious...
      /*
      Iterator<Candidate> descendingIterator = candidates.descendingIterator();
      while (descendingIterator.hasNext() && numberOfSelectedExamples < desiredNumber) {
      	Candidate candidate = descendingIterator.next();
       */

      List<Candidate> reverseCandidateList = new LinkedList<Candidate>();
      Iterator<Candidate> it = candidates.iterator();
      while (it.hasNext()) {
        reverseCandidateList.add(it.next());
      }

      ListIterator<Candidate> lit =
          reverseCandidateList.listIterator(reverseCandidateList.size() - 1);
      while (lit.hasPrevious()) {
        Candidate candidate = lit.previous();
        // IM: end of workaround

        boolean existSmallerDistance = false;
        Iterator<Candidate> addedIterator = recentlySelected.iterator();
        // test if a distance to recently selected is smaller than previously calculated minimal
        // distance
        // if one exists: This is not selected
        while (addedIterator.hasNext()) {
          double distance =
              Math.abs(
                  distanceMeasure.calculateDistance(
                      addedIterator.next().getValues(), candidate.getValues()));
          existSmallerDistance = existSmallerDistance || distance < candidate.getDistance();
        }
        if (!existSmallerDistance) {
          recentlySelected.add(candidate);
          partition[candidate.getExampleIndex()] = 1;
          numberOfSelectedExamples++;
        } else break;
      }
    }

    // building new exampleSet containing only Examples with indices in selectedExamples

    SplittedExampleSet sample = new SplittedExampleSet(exampleSet, new Partition(partition, 2));
    sample.selectSingleSubset(1);
    return sample;
  }