/** * Gets the current settings of WrapperSubsetEval. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions() { String[] evaluatorOptions = new String[0]; if ((m_ASEval != null) && (m_ASEval instanceof OptionHandler)) { evaluatorOptions = ((OptionHandler) m_ASEval).getOptions(); } String[] options = new String[8 + evaluatorOptions.length]; int current = 0; options[current++] = "-S"; options[current++] = "" + getStepSize(); options[current++] = "-R"; options[current++] = "" + getStartPoint(); if (getAttributeEvaluator() != null) { options[current++] = "-A"; options[current++] = getAttributeEvaluator().getClass().getName(); } if (evaluatorOptions.length > 0) { options[current++] = "--"; System.arraycopy(evaluatorOptions, 0, options, current, evaluatorOptions.length); current += evaluatorOptions.length; } while (current < options.length) { options[current++] = ""; } return options; }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if distribution can't be computed */ public double[] distributionForInstance(Instance instance) throws Exception { DecisionTableHashKey thekey; double[] tempDist; double[] normDist; m_disTransform.input(instance); m_disTransform.batchFinished(); instance = m_disTransform.output(); m_delTransform.input(instance); m_delTransform.batchFinished(); instance = m_delTransform.output(); thekey = new DecisionTableHashKey(instance, instance.numAttributes(), false); // if this one is not in the table if ((tempDist = (double[]) m_entries.get(thekey)) == null) { if (m_useIBk) { tempDist = m_ibk.distributionForInstance(instance); } else { if (!m_classIsNominal) { tempDist = new double[1]; tempDist[0] = m_majority; } else { tempDist = m_classPriors.clone(); /*tempDist = new double [m_theInstances.classAttribute().numValues()]; tempDist[(int)m_majority] = 1.0; */ } } } else { if (!m_classIsNominal) { normDist = new double[1]; normDist[0] = (tempDist[0] / tempDist[1]); tempDist = normDist; } else { // normalise distribution normDist = new double[tempDist.length]; System.arraycopy(tempDist, 0, normDist, 0, tempDist.length); Utils.normalize(normDist); tempDist = normDist; } } return tempDist; }
/** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { String[] superOptions = super.getOptions(); String[] options = new String[superOptions.length + 4]; int current = 0; options[current++] = "-E"; options[current++] = "" + getDesiredSize(); options[current++] = "-R"; options[current++] = "" + getArtificialSize(); System.arraycopy(superOptions, 0, options, current, superOptions.length); current += superOptions.length; while (current < options.length) { options[current++] = ""; } return options; }
/** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { String[] superOptions = super.getOptions(); String[] options = new String[superOptions.length + 6]; int current = 0; options[current++] = "-U"; options[current++] = "" + getWeightingKernel(); if ((getKNN() == 0) && m_UseAllK) { options[current++] = "-K"; options[current++] = "-1"; } else { options[current++] = "-K"; options[current++] = "" + getKNN(); } options[current++] = "-A"; options[current++] = m_NNSearch.getClass().getName() + " " + Utils.joinOptions(m_NNSearch.getOptions()); System.arraycopy(superOptions, 0, options, current, superOptions.length); return options; }
/** * Returns the best cut of a graph w.r.t. the degree of dissimilarity between points of different * partitions and the degree of similarity between points of the same partition. * * @param W the weight matrix of the graph * @return an array of two elements, each of these contains the points of a partition */ protected static int[][] bestCut(DoubleMatrix2D W) { int n = W.columns(); // Builds the diagonal matrices D and D^(-1/2) (represented as their diagonals) DoubleMatrix1D d = DoubleFactory1D.dense.make(n); DoubleMatrix1D d_minus_1_2 = DoubleFactory1D.dense.make(n); for (int i = 0; i < n; i++) { double d_i = W.viewRow(i).zSum(); d.set(i, d_i); d_minus_1_2.set(i, 1 / Math.sqrt(d_i)); } DoubleMatrix2D D = DoubleFactory2D.sparse.diagonal(d); // System.out.println("DoubleMatrix2D :\n"+D.toString()); DoubleMatrix2D X = D.copy(); // System.out.println("DoubleMatrix2D copy :\n"+X.toString()); // X = D^(-1/2) * (D - W) * D^(-1/2) X.assign(W, Functions.minus); // System.out.println("DoubleMatrix2D X: (D-W) :\n"+X.toString()); for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) X.set(i, j, X.get(i, j) * d_minus_1_2.get(i) * d_minus_1_2.get(j)); // Computes the eigenvalues and the eigenvectors of X EigenvalueDecomposition e = new EigenvalueDecomposition(X); DoubleMatrix1D lambda = e.getRealEigenvalues(); // Selects the eigenvector z_2 associated with the second smallest eigenvalue // Creates a map that contains the pairs <index, eigenvalue> AbstractIntDoubleMap map = new OpenIntDoubleHashMap(n); for (int i = 0; i < n; i++) map.put(i, Math.abs(lambda.get(i))); IntArrayList list = new IntArrayList(); // Sorts the map on the value map.keysSortedByValue(list); // Gets the index of the second smallest element int i_2 = list.get(1); // y_2 = D^(-1/2) * z_2 DoubleMatrix1D y_2 = e.getV().viewColumn(i_2).copy(); y_2.assign(d_minus_1_2, Functions.mult); // Creates a map that contains the pairs <i, y_2[i]> map.clear(); for (int i = 0; i < n; i++) map.put(i, y_2.get(i)); // Sorts the map on the value map.keysSortedByValue(list); // Search the element in the map previuosly ordered that minimizes the cut // of the partition double best_cut = Double.POSITIVE_INFINITY; int[][] partition = new int[2][]; // The array v contains all the elements of the graph ordered by their // projection on vector y_2 int[] v = list.elements(); // For each admissible splitting point i for (int i = 1; i < n; i++) { // The array a contains all the elements that have a projection on vector // y_2 less or equal to the one of i-th element // The array b contains the remaining elements int[] a = new int[i]; int[] b = new int[n - i]; System.arraycopy(v, 0, a, 0, i); System.arraycopy(v, i, b, 0, n - i); double cut = Ncut(W, a, b, v); if (cut < best_cut) { best_cut = cut; partition[0] = a; partition[1] = b; } } // System.out.println("Partition:"); // UtilsJS.printMatrix(partition); return partition; }
/** * Merges two sets of points represented as integer vectors. The sets are not overlapped. * * @param a the first set of points * @param b the second set of points * @return the union of the two sets */ protected static int[] merge(int[] a, int[] b) { int[] v = new int[a.length + b.length]; System.arraycopy(a, 0, v, 0, a.length); System.arraycopy(b, 0, v, a.length, b.length); return v; }
/** * evaluates an individual attribute by measuring the gain ratio of the class given the attribute. * * @param attribute the index of the attribute to be evaluated * @return the gain ratio * @throws Exception if the attribute could not be evaluated */ public double evaluateAttribute(int attribute) throws Exception { int i, j, ii, jj; int ni, nj; double sum = 0.0; ni = m_trainInstances.attribute(attribute).numValues() + 1; nj = m_numClasses + 1; double[] sumi, sumj; Instance inst; double temp = 0.0; sumi = new double[ni]; sumj = new double[nj]; double[][] counts = new double[ni][nj]; sumi = new double[ni]; sumj = new double[nj]; for (i = 0; i < ni; i++) { sumi[i] = 0.0; for (j = 0; j < nj; j++) { sumj[j] = 0.0; counts[i][j] = 0.0; } } // Fill the contingency table for (i = 0; i < m_numInstances; i++) { inst = m_trainInstances.instance(i); if (inst.isMissing(attribute)) { ii = ni - 1; } else { ii = (int) inst.value(attribute); } if (inst.isMissing(m_classIndex)) { jj = nj - 1; } else { jj = (int) inst.value(m_classIndex); } counts[ii][jj]++; } // get the row totals for (i = 0; i < ni; i++) { sumi[i] = 0.0; for (j = 0; j < nj; j++) { sumi[i] += counts[i][j]; sum += counts[i][j]; } } // get the column totals for (j = 0; j < nj; j++) { sumj[j] = 0.0; for (i = 0; i < ni; i++) { sumj[j] += counts[i][j]; } } // distribute missing counts if (m_missing_merge && (sumi[ni - 1] < m_numInstances) && (sumj[nj - 1] < m_numInstances)) { double[] i_copy = new double[sumi.length]; double[] j_copy = new double[sumj.length]; double[][] counts_copy = new double[sumi.length][sumj.length]; for (i = 0; i < ni; i++) { System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length); } System.arraycopy(sumi, 0, i_copy, 0, sumi.length); System.arraycopy(sumj, 0, j_copy, 0, sumj.length); double total_missing = (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]); // do the missing i's if (sumi[ni - 1] > 0.0) { for (j = 0; j < nj - 1; j++) { if (counts[ni - 1][j] > 0.0) { for (i = 0; i < ni - 1; i++) { temp = ((i_copy[i] / (sum - i_copy[ni - 1])) * counts[ni - 1][j]); counts[i][j] += temp; sumi[i] += temp; } counts[ni - 1][j] = 0.0; } } } sumi[ni - 1] = 0.0; // do the missing j's if (sumj[nj - 1] > 0.0) { for (i = 0; i < ni - 1; i++) { if (counts[i][nj - 1] > 0.0) { for (j = 0; j < nj - 1; j++) { temp = ((j_copy[j] / (sum - j_copy[nj - 1])) * counts[i][nj - 1]); counts[i][j] += temp; sumj[j] += temp; } counts[i][nj - 1] = 0.0; } } } sumj[nj - 1] = 0.0; // do the both missing if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) { for (i = 0; i < ni - 1; i++) { for (j = 0; j < nj - 1; j++) { temp = (counts_copy[i][j] / (sum - total_missing)) * counts_copy[ni - 1][nj - 1]; counts[i][j] += temp; sumi[i] += temp; sumj[j] += temp; } } counts[ni - 1][nj - 1] = 0.0; } } return ContingencyTables.gainRatio(counts); }
/** * Calculates the accuracy on a test fold for internal cross validation of feature sets * * @param fold set of instances to be "left out" and classified * @param fs currently selected feature set * @return the accuracy for the fold * @throws Exception if something goes wrong */ double evaluateFoldCV(Instances fold, int[] fs) throws Exception { int i; int ruleCount = 0; int numFold = fold.numInstances(); int numCl = m_theInstances.classAttribute().numValues(); double[][] class_distribs = new double[numFold][numCl]; double[] instA = new double[fs.length]; double[] normDist; DecisionTableHashKey thekey; double acc = 0.0; int classI = m_theInstances.classIndex(); Instance inst; if (m_classIsNominal) { normDist = new double[numCl]; } else { normDist = new double[2]; } // first *remove* instances for (i = 0; i < numFold; i++) { inst = fold.instance(i); for (int j = 0; j < fs.length; j++) { if (fs[j] == classI) { instA[j] = Double.MAX_VALUE; // missing for the class } else if (inst.isMissing(fs[j])) { instA[j] = Double.MAX_VALUE; } else { instA[j] = inst.value(fs[j]); } } thekey = new DecisionTableHashKey(instA); if ((class_distribs[i] = (double[]) m_entries.get(thekey)) == null) { throw new Error("This should never happen!"); } else { if (m_classIsNominal) { class_distribs[i][(int) inst.classValue()] -= inst.weight(); } else { class_distribs[i][0] -= (inst.classValue() * inst.weight()); class_distribs[i][1] -= inst.weight(); } ruleCount++; } m_classPriorCounts[(int) inst.classValue()] -= inst.weight(); } double[] classPriors = m_classPriorCounts.clone(); Utils.normalize(classPriors); // now classify instances for (i = 0; i < numFold; i++) { inst = fold.instance(i); System.arraycopy(class_distribs[i], 0, normDist, 0, normDist.length); if (m_classIsNominal) { boolean ok = false; for (int j = 0; j < normDist.length; j++) { if (Utils.gr(normDist[j], 1.0)) { ok = true; break; } } if (!ok) { // majority class normDist = classPriors.clone(); } // if (ok) { Utils.normalize(normDist); if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, inst); } else { m_evaluation.evaluateModelOnce(normDist, inst); } /* } else { normDist[(int)m_majority] = 1.0; if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, inst); } else { m_evaluation.evaluateModelOnce(normDist, inst); } } */ } else { if (Utils.eq(normDist[1], 0.0)) { double[] temp = new double[1]; temp[0] = m_majority; m_evaluation.evaluateModelOnce(temp, inst); } else { double[] temp = new double[1]; temp[0] = normDist[0] / normDist[1]; m_evaluation.evaluateModelOnce(temp, inst); } } } // now re-insert instances for (i = 0; i < numFold; i++) { inst = fold.instance(i); m_classPriorCounts[(int) inst.classValue()] += inst.weight(); if (m_classIsNominal) { class_distribs[i][(int) inst.classValue()] += inst.weight(); } else { class_distribs[i][0] += (inst.classValue() * inst.weight()); class_distribs[i][1] += inst.weight(); } } return acc; }
/** * Classifies an instance for internal leave one out cross validation of feature sets * * @param instance instance to be "left out" and classified * @param instA feature values of the selected features for the instance * @return the classification of the instance * @throws Exception if something goes wrong */ double evaluateInstanceLeaveOneOut(Instance instance, double[] instA) throws Exception { DecisionTableHashKey thekey; double[] tempDist; double[] normDist; thekey = new DecisionTableHashKey(instA); if (m_classIsNominal) { // if this one is not in the table if ((tempDist = (double[]) m_entries.get(thekey)) == null) { throw new Error("This should never happen!"); } else { normDist = new double[tempDist.length]; System.arraycopy(tempDist, 0, normDist, 0, tempDist.length); normDist[(int) instance.classValue()] -= instance.weight(); // update the table // first check to see if the class counts are all zero now boolean ok = false; for (int i = 0; i < normDist.length; i++) { if (Utils.gr(normDist[i], 1.0)) { ok = true; break; } } // downdate the class prior counts m_classPriorCounts[(int) instance.classValue()] -= instance.weight(); double[] classPriors = m_classPriorCounts.clone(); Utils.normalize(classPriors); if (!ok) { // majority class normDist = classPriors; } m_classPriorCounts[(int) instance.classValue()] += instance.weight(); // if (ok) { Utils.normalize(normDist); if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance); } else { m_evaluation.evaluateModelOnce(normDist, instance); } return Utils.maxIndex(normDist); /*} else { normDist = new double [normDist.length]; normDist[(int)m_majority] = 1.0; if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance); } else { m_evaluation.evaluateModelOnce(normDist, instance); } return m_majority; } */ } // return Utils.maxIndex(tempDist); } else { // see if this one is already in the table if ((tempDist = (double[]) m_entries.get(thekey)) != null) { normDist = new double[tempDist.length]; System.arraycopy(tempDist, 0, normDist, 0, tempDist.length); normDist[0] -= (instance.classValue() * instance.weight()); normDist[1] -= instance.weight(); if (Utils.eq(normDist[1], 0.0)) { double[] temp = new double[1]; temp[0] = m_majority; m_evaluation.evaluateModelOnce(temp, instance); return m_majority; } else { double[] temp = new double[1]; temp[0] = normDist[0] / normDist[1]; m_evaluation.evaluateModelOnce(temp, instance); return temp[0]; } } else { throw new Error("This should never happen!"); } } // shouldn't get here // return 0.0; }
/** * Generates the classifier. * * @param data set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class m_theInstances = new Instances(data); m_theInstances.deleteWithMissingClass(); m_rr = new Random(1); if (m_theInstances.classAttribute().isNominal()) { // Set up class priors m_classPriorCounts = new double[data.classAttribute().numValues()]; Arrays.fill(m_classPriorCounts, 1.0); for (int i = 0; i < data.numInstances(); i++) { Instance curr = data.instance(i); m_classPriorCounts[(int) curr.classValue()] += curr.weight(); } m_classPriors = m_classPriorCounts.clone(); Utils.normalize(m_classPriors); } setUpEvaluator(); if (m_theInstances.classAttribute().isNumeric()) { m_disTransform = new weka.filters.unsupervised.attribute.Discretize(); m_classIsNominal = false; // use binned discretisation if the class is numeric ((weka.filters.unsupervised.attribute.Discretize) m_disTransform).setBins(10); ((weka.filters.unsupervised.attribute.Discretize) m_disTransform).setInvertSelection(true); // Discretize all attributes EXCEPT the class String rangeList = ""; rangeList += (m_theInstances.classIndex() + 1); // System.out.println("The class col: "+m_theInstances.classIndex()); ((weka.filters.unsupervised.attribute.Discretize) m_disTransform) .setAttributeIndices(rangeList); } else { m_disTransform = new weka.filters.supervised.attribute.Discretize(); ((weka.filters.supervised.attribute.Discretize) m_disTransform).setUseBetterEncoding(true); m_classIsNominal = true; } m_disTransform.setInputFormat(m_theInstances); m_theInstances = Filter.useFilter(m_theInstances, m_disTransform); m_numAttributes = m_theInstances.numAttributes(); m_numInstances = m_theInstances.numInstances(); m_majority = m_theInstances.meanOrMode(m_theInstances.classAttribute()); // Perform the search int[] selected = m_search.search(m_evaluator, m_theInstances); m_decisionFeatures = new int[selected.length + 1]; System.arraycopy(selected, 0, m_decisionFeatures, 0, selected.length); m_decisionFeatures[m_decisionFeatures.length - 1] = m_theInstances.classIndex(); // reduce instances to selected features m_delTransform = new Remove(); m_delTransform.setInvertSelection(true); // set features to keep m_delTransform.setAttributeIndicesArray(m_decisionFeatures); m_delTransform.setInputFormat(m_theInstances); m_dtInstances = Filter.useFilter(m_theInstances, m_delTransform); // reset the number of attributes m_numAttributes = m_dtInstances.numAttributes(); // create hash table m_entries = new Hashtable((int) (m_dtInstances.numInstances() * 1.5)); // insert instances into the hash table for (int i = 0; i < m_numInstances; i++) { Instance inst = m_dtInstances.instance(i); insertIntoTable(inst, null); } // Replace the global table majority with nearest neighbour? if (m_useIBk) { m_ibk = new IBk(); m_ibk.buildClassifier(m_theInstances); } // Save memory if (m_saveMemory) { m_theInstances = new Instances(m_theInstances, 0); m_dtInstances = new Instances(m_dtInstances, 0); } m_evaluation = null; }