Ejemplo n.º 1
0
  /**
   * Returns class probabilities. When minimum expected cost approach is chosen, returns probability
   * one for class with the minimum expected misclassification cost. Otherwise it returns the
   * probability distribution returned by the base classifier.
   *
   * @param instance the instance to be classified
   * @return the computed distribution for the given instance
   * @throws Exception if instance could not be classified successfully
   */
  public double[] distributionForInstance(Instance instance) throws Exception {

    if (!m_MinimizeExpectedCost) {
      return m_Classifier.distributionForInstance(instance);
    }
    double[] pred = m_Classifier.distributionForInstance(instance);
    double[] costs = m_CostMatrix.expectedCosts(pred, instance);
    /*
    for (int i = 0; i < pred.length; i++) {
      System.out.print(pred[i] + " ");
    }
    System.out.println();
    for (int i = 0; i < costs.length; i++) {
      System.out.print(costs[i] + " ");
    }
    System.out.println("\n");
    */

    // This is probably not ideal
    int classIndex = Utils.minIndex(costs);
    for (int i = 0; i < pred.length; i++) {
      if (i == classIndex) {
        pred[i] = 1.0;
      } else {
        pred[i] = 0.0;
      }
    }
    return pred;
  }
Ejemplo n.º 2
0
 /**
  * Cluster an instance into the nearest cluster.
  *
  * @param instIdx Index of the instance to be clustered
  * @param input Object which describe the statistics of the training dataset
  * @param T Partition
  * @return index of the cluster that has the minimum distance to the instance
  */
 private int clusterInstance(int instIdx, Input input, Partition T) {
   double[] distances = new double[m_numCluster];
   for (int i = 0; i < m_numCluster; i++) {
     double Pnew = input.Px[instIdx] + T.Pt[i];
     double pi1 = input.Px[instIdx] / Pnew;
     double pi2 = T.Pt[i] / Pnew;
     distances[i] = Pnew * JS(instIdx, input, T, i, pi1, pi2);
   }
   return Utils.minIndex(distances);
 }
  public int SelectRow_KLDivergenceMisclassified(
      Instances pool, Classifier myEstimator, int desiredAttr) {

    // for each instance with unbought desiredAttr and label = desiredLabel
    // measure KL-divergence (relative entropy between two prob distributions):
    //  KL(P||Q) = sum_i  p_i log (p_i/q_i)
    // withr respect to Q = Uniform, we have
    //  KL(P||U) = sum_i p_i log(p_i)
    // choose (row) that is minimum (i.e. closest to uniform)

    int numInstances = pool.numInstances();
    double[] KLDivs = new double[numInstances];
    boolean[] isValidInstance = new boolean[numInstances];
    boolean misclassified = false;
    double[] probs = null;
    Instance inst;

    for (int i = 0; i < numInstances; i++) {
      inst = pool.instance(i);
      try {
        if (inst.classValue() != myEstimator.classifyInstance(inst)) misclassified = true;
        else misclassified = false;
      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      if (inst.isMissing(desiredAttr) && misclassified) {
        try {
          probs = myEstimator.distributionForInstance(inst);
        } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        for (int j = 0; j < probs.length; j++) KLDivs[i] += MyXLogX(probs[j]);
        isValidInstance[i] = true;
      } else {
        KLDivs[i] = Double.MAX_VALUE;
        isValidInstance[i] = false;
      }
    }

    double leastDivergence = KLDivs[Utils.minIndex(KLDivs)];
    int numLeastDivs = 0;
    for (int i = 0; i < numInstances; i++)
      if (isValidInstance[i] && KLDivs[i] == leastDivergence) numLeastDivs++;
    int randomInstance = r.nextInt(numLeastDivs);
    int index = 0;
    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && KLDivs[i] == leastDivergence) {
        if (index == randomInstance) return i;
        else index++;
      }
    }
    return -1;
  }
Ejemplo n.º 4
0
 /**
  * Cluster a given instance, this is the method defined in Clusterer interface do nothing but just
  * return the cluster assigned to it
  */
 @Override
 public int clusterInstance(Instance instance) throws Exception {
   double prior = 1 / input.sumVals;
   double[] distances = new double[m_numCluster];
   for (int i = 0; i < m_numCluster; i++) {
     double Pnew = bestT.Pt[i] + prior;
     double pi1 = prior / Pnew;
     double pi2 = bestT.Pt[i] / Pnew;
     distances[i] = Pnew * JS(instance, i, pi1, pi2);
   }
   return Utils.minIndex(distances);
 }
  int SelectRow_ErrorMargin(Instances pool, Classifier myEstimator, int desiredAttr) {

    // for each instance with unbought desiredAttr and label = desiredLabel
    // measure Prob(i,L(i)) the class probability of the true label, choose the one minimizing it.
    // i.e. the most erroneous instance

    int numInstances = pool.numInstances();
    double[] classProb = new double[numInstances];
    boolean[] isValidInstance = new boolean[numInstances];
    double[] probs = null;
    Instance inst;

    for (int i = 0; i < numInstances; i++) {
      inst = pool.instance(i);
      if (inst.isMissing(desiredAttr)) {
        try {
          probs = myEstimator.distributionForInstance(inst);
          classProb[i] = probs[(int) inst.classValue()];
          isValidInstance[i] = true;

        } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

      } else {
        classProb[i] = Double.POSITIVE_INFINITY;
        isValidInstance[i] = false;
      }
    }

    double leastCorrect = classProb[Utils.minIndex(classProb)];
    int numLeastCorrect = 0;
    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && classProb[i] == leastCorrect) numLeastCorrect++;
    }

    int randomInstance = r.nextInt(numLeastCorrect);
    int index = 0;

    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && classProb[i] == leastCorrect) {
        if (index == randomInstance) return i;
        else index++;
      }
    }
    return -1;
  }