/** * Returns class probabilities. When minimum expected cost approach is chosen, returns probability * one for class with the minimum expected misclassification cost. Otherwise it returns the * probability distribution returned by the base classifier. * * @param instance the instance to be classified * @return the computed distribution for the given instance * @throws Exception if instance could not be classified successfully */ public double[] distributionForInstance(Instance instance) throws Exception { if (!m_MinimizeExpectedCost) { return m_Classifier.distributionForInstance(instance); } double[] pred = m_Classifier.distributionForInstance(instance); double[] costs = m_CostMatrix.expectedCosts(pred, instance); /* for (int i = 0; i < pred.length; i++) { System.out.print(pred[i] + " "); } System.out.println(); for (int i = 0; i < costs.length; i++) { System.out.print(costs[i] + " "); } System.out.println("\n"); */ // This is probably not ideal int classIndex = Utils.minIndex(costs); for (int i = 0; i < pred.length; i++) { if (i == classIndex) { pred[i] = 1.0; } else { pred[i] = 0.0; } } return pred; }
/** * Cluster an instance into the nearest cluster. * * @param instIdx Index of the instance to be clustered * @param input Object which describe the statistics of the training dataset * @param T Partition * @return index of the cluster that has the minimum distance to the instance */ private int clusterInstance(int instIdx, Input input, Partition T) { double[] distances = new double[m_numCluster]; for (int i = 0; i < m_numCluster; i++) { double Pnew = input.Px[instIdx] + T.Pt[i]; double pi1 = input.Px[instIdx] / Pnew; double pi2 = T.Pt[i] / Pnew; distances[i] = Pnew * JS(instIdx, input, T, i, pi1, pi2); } return Utils.minIndex(distances); }
public int SelectRow_KLDivergenceMisclassified( Instances pool, Classifier myEstimator, int desiredAttr) { // for each instance with unbought desiredAttr and label = desiredLabel // measure KL-divergence (relative entropy between two prob distributions): // KL(P||Q) = sum_i p_i log (p_i/q_i) // withr respect to Q = Uniform, we have // KL(P||U) = sum_i p_i log(p_i) // choose (row) that is minimum (i.e. closest to uniform) int numInstances = pool.numInstances(); double[] KLDivs = new double[numInstances]; boolean[] isValidInstance = new boolean[numInstances]; boolean misclassified = false; double[] probs = null; Instance inst; for (int i = 0; i < numInstances; i++) { inst = pool.instance(i); try { if (inst.classValue() != myEstimator.classifyInstance(inst)) misclassified = true; else misclassified = false; } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } if (inst.isMissing(desiredAttr) && misclassified) { try { probs = myEstimator.distributionForInstance(inst); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } for (int j = 0; j < probs.length; j++) KLDivs[i] += MyXLogX(probs[j]); isValidInstance[i] = true; } else { KLDivs[i] = Double.MAX_VALUE; isValidInstance[i] = false; } } double leastDivergence = KLDivs[Utils.minIndex(KLDivs)]; int numLeastDivs = 0; for (int i = 0; i < numInstances; i++) if (isValidInstance[i] && KLDivs[i] == leastDivergence) numLeastDivs++; int randomInstance = r.nextInt(numLeastDivs); int index = 0; for (int i = 0; i < numInstances; i++) { if (isValidInstance[i] && KLDivs[i] == leastDivergence) { if (index == randomInstance) return i; else index++; } } return -1; }
/** * Cluster a given instance, this is the method defined in Clusterer interface do nothing but just * return the cluster assigned to it */ @Override public int clusterInstance(Instance instance) throws Exception { double prior = 1 / input.sumVals; double[] distances = new double[m_numCluster]; for (int i = 0; i < m_numCluster; i++) { double Pnew = bestT.Pt[i] + prior; double pi1 = prior / Pnew; double pi2 = bestT.Pt[i] / Pnew; distances[i] = Pnew * JS(instance, i, pi1, pi2); } return Utils.minIndex(distances); }
int SelectRow_ErrorMargin(Instances pool, Classifier myEstimator, int desiredAttr) { // for each instance with unbought desiredAttr and label = desiredLabel // measure Prob(i,L(i)) the class probability of the true label, choose the one minimizing it. // i.e. the most erroneous instance int numInstances = pool.numInstances(); double[] classProb = new double[numInstances]; boolean[] isValidInstance = new boolean[numInstances]; double[] probs = null; Instance inst; for (int i = 0; i < numInstances; i++) { inst = pool.instance(i); if (inst.isMissing(desiredAttr)) { try { probs = myEstimator.distributionForInstance(inst); classProb[i] = probs[(int) inst.classValue()]; isValidInstance[i] = true; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { classProb[i] = Double.POSITIVE_INFINITY; isValidInstance[i] = false; } } double leastCorrect = classProb[Utils.minIndex(classProb)]; int numLeastCorrect = 0; for (int i = 0; i < numInstances; i++) { if (isValidInstance[i] && classProb[i] == leastCorrect) numLeastCorrect++; } int randomInstance = r.nextInt(numLeastCorrect); int index = 0; for (int i = 0; i < numInstances; i++) { if (isValidInstance[i] && classProb[i] == leastCorrect) { if (index == randomInstance) return i; else index++; } } return -1; }