예제 #1
0
  /**
   * Gets the current settings of WrapperSubsetEval.
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions() {
    String[] evaluatorOptions = new String[0];

    if ((m_ASEval != null) && (m_ASEval instanceof OptionHandler)) {
      evaluatorOptions = ((OptionHandler) m_ASEval).getOptions();
    }

    String[] options = new String[8 + evaluatorOptions.length];
    int current = 0;

    options[current++] = "-S";
    options[current++] = "" + getStepSize();

    options[current++] = "-R";
    options[current++] = "" + getStartPoint();

    if (getAttributeEvaluator() != null) {
      options[current++] = "-A";
      options[current++] = getAttributeEvaluator().getClass().getName();
    }

    if (evaluatorOptions.length > 0) {
      options[current++] = "--";
      System.arraycopy(evaluatorOptions, 0, options, current, evaluatorOptions.length);
      current += evaluatorOptions.length;
    }

    while (current < options.length) {
      options[current++] = "";
    }

    return options;
  }
예제 #2
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @throws Exception if distribution can't be computed
   */
  public double[] distributionForInstance(Instance instance) throws Exception {

    DecisionTableHashKey thekey;
    double[] tempDist;
    double[] normDist;

    m_disTransform.input(instance);
    m_disTransform.batchFinished();
    instance = m_disTransform.output();

    m_delTransform.input(instance);
    m_delTransform.batchFinished();
    instance = m_delTransform.output();

    thekey = new DecisionTableHashKey(instance, instance.numAttributes(), false);

    // if this one is not in the table
    if ((tempDist = (double[]) m_entries.get(thekey)) == null) {
      if (m_useIBk) {
        tempDist = m_ibk.distributionForInstance(instance);
      } else {
        if (!m_classIsNominal) {
          tempDist = new double[1];
          tempDist[0] = m_majority;
        } else {
          tempDist = m_classPriors.clone();
          /*tempDist = new double [m_theInstances.classAttribute().numValues()];
          tempDist[(int)m_majority] = 1.0; */
        }
      }
    } else {
      if (!m_classIsNominal) {
        normDist = new double[1];
        normDist[0] = (tempDist[0] / tempDist[1]);
        tempDist = normDist;
      } else {

        // normalise distribution
        normDist = new double[tempDist.length];
        System.arraycopy(tempDist, 0, normDist, 0, tempDist.length);
        Utils.normalize(normDist);
        tempDist = normDist;
      }
    }
    return tempDist;
  }
예제 #3
0
  /**
   * Gets the current settings of the Classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {

    String[] superOptions = super.getOptions();
    String[] options = new String[superOptions.length + 4];

    int current = 0;
    options[current++] = "-E";
    options[current++] = "" + getDesiredSize();
    options[current++] = "-R";
    options[current++] = "" + getArtificialSize();

    System.arraycopy(superOptions, 0, options, current, superOptions.length);

    current += superOptions.length;
    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }
예제 #4
0
파일: LWL.java 프로젝트: alishakiba/jDenetX
  /**
   * Gets the current settings of the classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {

    String[] superOptions = super.getOptions();
    String[] options = new String[superOptions.length + 6];

    int current = 0;

    options[current++] = "-U";
    options[current++] = "" + getWeightingKernel();
    if ((getKNN() == 0) && m_UseAllK) {
      options[current++] = "-K";
      options[current++] = "-1";
    } else {
      options[current++] = "-K";
      options[current++] = "" + getKNN();
    }
    options[current++] = "-A";
    options[current++] =
        m_NNSearch.getClass().getName() + " " + Utils.joinOptions(m_NNSearch.getOptions());

    System.arraycopy(superOptions, 0, options, current, superOptions.length);

    return options;
  }
  /**
   * Returns the best cut of a graph w.r.t. the degree of dissimilarity between points of different
   * partitions and the degree of similarity between points of the same partition.
   *
   * @param W the weight matrix of the graph
   * @return an array of two elements, each of these contains the points of a partition
   */
  protected static int[][] bestCut(DoubleMatrix2D W) {
    int n = W.columns();
    // Builds the diagonal matrices D and D^(-1/2) (represented as their diagonals)
    DoubleMatrix1D d = DoubleFactory1D.dense.make(n);
    DoubleMatrix1D d_minus_1_2 = DoubleFactory1D.dense.make(n);
    for (int i = 0; i < n; i++) {
      double d_i = W.viewRow(i).zSum();
      d.set(i, d_i);
      d_minus_1_2.set(i, 1 / Math.sqrt(d_i));
    }
    DoubleMatrix2D D = DoubleFactory2D.sparse.diagonal(d);

    // System.out.println("DoubleMatrix2D :\n"+D.toString());

    DoubleMatrix2D X = D.copy();

    // System.out.println("DoubleMatrix2D copy :\n"+X.toString());

    // X = D^(-1/2) * (D - W) * D^(-1/2)
    X.assign(W, Functions.minus);
    // System.out.println("DoubleMatrix2D X: (D-W) :\n"+X.toString());
    for (int i = 0; i < n; i++)
      for (int j = 0; j < n; j++)
        X.set(i, j, X.get(i, j) * d_minus_1_2.get(i) * d_minus_1_2.get(j));

    // Computes the eigenvalues and the eigenvectors of X
    EigenvalueDecomposition e = new EigenvalueDecomposition(X);
    DoubleMatrix1D lambda = e.getRealEigenvalues();

    // Selects the eigenvector z_2 associated with the second smallest eigenvalue
    // Creates a map that contains the pairs <index, eigenvalue>
    AbstractIntDoubleMap map = new OpenIntDoubleHashMap(n);
    for (int i = 0; i < n; i++) map.put(i, Math.abs(lambda.get(i)));
    IntArrayList list = new IntArrayList();
    // Sorts the map on the value
    map.keysSortedByValue(list);
    // Gets the index of the second smallest element
    int i_2 = list.get(1);

    // y_2 = D^(-1/2) * z_2
    DoubleMatrix1D y_2 = e.getV().viewColumn(i_2).copy();
    y_2.assign(d_minus_1_2, Functions.mult);

    // Creates a map that contains the pairs <i, y_2[i]>
    map.clear();
    for (int i = 0; i < n; i++) map.put(i, y_2.get(i));
    // Sorts the map on the value
    map.keysSortedByValue(list);
    // Search the element in the map previuosly ordered that minimizes the cut
    // of the partition
    double best_cut = Double.POSITIVE_INFINITY;
    int[][] partition = new int[2][];

    // The array v contains all the elements of the graph ordered by their
    // projection on vector y_2
    int[] v = list.elements();
    // For each admissible splitting point i
    for (int i = 1; i < n; i++) {
      // The array a contains all the elements that have a projection on vector
      // y_2 less or equal to the one of i-th element
      // The array b contains the remaining elements
      int[] a = new int[i];
      int[] b = new int[n - i];
      System.arraycopy(v, 0, a, 0, i);
      System.arraycopy(v, i, b, 0, n - i);
      double cut = Ncut(W, a, b, v);
      if (cut < best_cut) {
        best_cut = cut;
        partition[0] = a;
        partition[1] = b;
      }
    }

    // System.out.println("Partition:");
    // UtilsJS.printMatrix(partition);

    return partition;
  }
 /**
  * Merges two sets of points represented as integer vectors. The sets are not overlapped.
  *
  * @param a the first set of points
  * @param b the second set of points
  * @return the union of the two sets
  */
 protected static int[] merge(int[] a, int[] b) {
   int[] v = new int[a.length + b.length];
   System.arraycopy(a, 0, v, 0, a.length);
   System.arraycopy(b, 0, v, a.length, b.length);
   return v;
 }
  /**
   * evaluates an individual attribute by measuring the gain ratio of the class given the attribute.
   *
   * @param attribute the index of the attribute to be evaluated
   * @return the gain ratio
   * @throws Exception if the attribute could not be evaluated
   */
  public double evaluateAttribute(int attribute) throws Exception {
    int i, j, ii, jj;
    int ni, nj;
    double sum = 0.0;
    ni = m_trainInstances.attribute(attribute).numValues() + 1;
    nj = m_numClasses + 1;
    double[] sumi, sumj;
    Instance inst;
    double temp = 0.0;
    sumi = new double[ni];
    sumj = new double[nj];
    double[][] counts = new double[ni][nj];
    sumi = new double[ni];
    sumj = new double[nj];

    for (i = 0; i < ni; i++) {
      sumi[i] = 0.0;

      for (j = 0; j < nj; j++) {
        sumj[j] = 0.0;
        counts[i][j] = 0.0;
      }
    }

    // Fill the contingency table
    for (i = 0; i < m_numInstances; i++) {
      inst = m_trainInstances.instance(i);

      if (inst.isMissing(attribute)) {
        ii = ni - 1;
      } else {
        ii = (int) inst.value(attribute);
      }

      if (inst.isMissing(m_classIndex)) {
        jj = nj - 1;
      } else {
        jj = (int) inst.value(m_classIndex);
      }

      counts[ii][jj]++;
    }

    // get the row totals
    for (i = 0; i < ni; i++) {
      sumi[i] = 0.0;

      for (j = 0; j < nj; j++) {
        sumi[i] += counts[i][j];
        sum += counts[i][j];
      }
    }

    // get the column totals
    for (j = 0; j < nj; j++) {
      sumj[j] = 0.0;

      for (i = 0; i < ni; i++) {
        sumj[j] += counts[i][j];
      }
    }

    // distribute missing counts
    if (m_missing_merge && (sumi[ni - 1] < m_numInstances) && (sumj[nj - 1] < m_numInstances)) {
      double[] i_copy = new double[sumi.length];
      double[] j_copy = new double[sumj.length];
      double[][] counts_copy = new double[sumi.length][sumj.length];

      for (i = 0; i < ni; i++) {
        System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length);
      }

      System.arraycopy(sumi, 0, i_copy, 0, sumi.length);
      System.arraycopy(sumj, 0, j_copy, 0, sumj.length);
      double total_missing = (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]);

      // do the missing i's
      if (sumi[ni - 1] > 0.0) {
        for (j = 0; j < nj - 1; j++) {
          if (counts[ni - 1][j] > 0.0) {
            for (i = 0; i < ni - 1; i++) {
              temp = ((i_copy[i] / (sum - i_copy[ni - 1])) * counts[ni - 1][j]);
              counts[i][j] += temp;
              sumi[i] += temp;
            }

            counts[ni - 1][j] = 0.0;
          }
        }
      }

      sumi[ni - 1] = 0.0;

      // do the missing j's
      if (sumj[nj - 1] > 0.0) {
        for (i = 0; i < ni - 1; i++) {
          if (counts[i][nj - 1] > 0.0) {
            for (j = 0; j < nj - 1; j++) {
              temp = ((j_copy[j] / (sum - j_copy[nj - 1])) * counts[i][nj - 1]);
              counts[i][j] += temp;
              sumj[j] += temp;
            }

            counts[i][nj - 1] = 0.0;
          }
        }
      }

      sumj[nj - 1] = 0.0;

      // do the both missing
      if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) {
        for (i = 0; i < ni - 1; i++) {
          for (j = 0; j < nj - 1; j++) {
            temp = (counts_copy[i][j] / (sum - total_missing)) * counts_copy[ni - 1][nj - 1];
            counts[i][j] += temp;
            sumi[i] += temp;
            sumj[j] += temp;
          }
        }

        counts[ni - 1][nj - 1] = 0.0;
      }
    }

    return ContingencyTables.gainRatio(counts);
  }
예제 #8
0
  /**
   * Calculates the accuracy on a test fold for internal cross validation of feature sets
   *
   * @param fold set of instances to be "left out" and classified
   * @param fs currently selected feature set
   * @return the accuracy for the fold
   * @throws Exception if something goes wrong
   */
  double evaluateFoldCV(Instances fold, int[] fs) throws Exception {

    int i;
    int ruleCount = 0;
    int numFold = fold.numInstances();
    int numCl = m_theInstances.classAttribute().numValues();
    double[][] class_distribs = new double[numFold][numCl];
    double[] instA = new double[fs.length];
    double[] normDist;
    DecisionTableHashKey thekey;
    double acc = 0.0;
    int classI = m_theInstances.classIndex();
    Instance inst;

    if (m_classIsNominal) {
      normDist = new double[numCl];
    } else {
      normDist = new double[2];
    }

    // first *remove* instances
    for (i = 0; i < numFold; i++) {
      inst = fold.instance(i);
      for (int j = 0; j < fs.length; j++) {
        if (fs[j] == classI) {
          instA[j] = Double.MAX_VALUE; // missing for the class
        } else if (inst.isMissing(fs[j])) {
          instA[j] = Double.MAX_VALUE;
        } else {
          instA[j] = inst.value(fs[j]);
        }
      }
      thekey = new DecisionTableHashKey(instA);
      if ((class_distribs[i] = (double[]) m_entries.get(thekey)) == null) {
        throw new Error("This should never happen!");
      } else {
        if (m_classIsNominal) {
          class_distribs[i][(int) inst.classValue()] -= inst.weight();
        } else {
          class_distribs[i][0] -= (inst.classValue() * inst.weight());
          class_distribs[i][1] -= inst.weight();
        }
        ruleCount++;
      }
      m_classPriorCounts[(int) inst.classValue()] -= inst.weight();
    }
    double[] classPriors = m_classPriorCounts.clone();
    Utils.normalize(classPriors);

    // now classify instances
    for (i = 0; i < numFold; i++) {
      inst = fold.instance(i);
      System.arraycopy(class_distribs[i], 0, normDist, 0, normDist.length);
      if (m_classIsNominal) {
        boolean ok = false;
        for (int j = 0; j < normDist.length; j++) {
          if (Utils.gr(normDist[j], 1.0)) {
            ok = true;
            break;
          }
        }

        if (!ok) { // majority class
          normDist = classPriors.clone();
        }

        //	if (ok) {
        Utils.normalize(normDist);
        if (m_evaluationMeasure == EVAL_AUC) {
          m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, inst);
        } else {
          m_evaluation.evaluateModelOnce(normDist, inst);
        }
        /*	} else {
          normDist[(int)m_majority] = 1.0;
          if (m_evaluationMeasure == EVAL_AUC) {
            m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, inst);
          } else {
            m_evaluation.evaluateModelOnce(normDist, inst);
          }
        } */
      } else {
        if (Utils.eq(normDist[1], 0.0)) {
          double[] temp = new double[1];
          temp[0] = m_majority;
          m_evaluation.evaluateModelOnce(temp, inst);
        } else {
          double[] temp = new double[1];
          temp[0] = normDist[0] / normDist[1];
          m_evaluation.evaluateModelOnce(temp, inst);
        }
      }
    }

    // now re-insert instances
    for (i = 0; i < numFold; i++) {
      inst = fold.instance(i);

      m_classPriorCounts[(int) inst.classValue()] += inst.weight();

      if (m_classIsNominal) {
        class_distribs[i][(int) inst.classValue()] += inst.weight();
      } else {
        class_distribs[i][0] += (inst.classValue() * inst.weight());
        class_distribs[i][1] += inst.weight();
      }
    }
    return acc;
  }
예제 #9
0
  /**
   * Classifies an instance for internal leave one out cross validation of feature sets
   *
   * @param instance instance to be "left out" and classified
   * @param instA feature values of the selected features for the instance
   * @return the classification of the instance
   * @throws Exception if something goes wrong
   */
  double evaluateInstanceLeaveOneOut(Instance instance, double[] instA) throws Exception {

    DecisionTableHashKey thekey;
    double[] tempDist;
    double[] normDist;

    thekey = new DecisionTableHashKey(instA);
    if (m_classIsNominal) {

      // if this one is not in the table
      if ((tempDist = (double[]) m_entries.get(thekey)) == null) {
        throw new Error("This should never happen!");
      } else {
        normDist = new double[tempDist.length];
        System.arraycopy(tempDist, 0, normDist, 0, tempDist.length);
        normDist[(int) instance.classValue()] -= instance.weight();

        // update the table
        // first check to see if the class counts are all zero now
        boolean ok = false;
        for (int i = 0; i < normDist.length; i++) {
          if (Utils.gr(normDist[i], 1.0)) {
            ok = true;
            break;
          }
        }

        //	downdate the class prior counts
        m_classPriorCounts[(int) instance.classValue()] -= instance.weight();
        double[] classPriors = m_classPriorCounts.clone();
        Utils.normalize(classPriors);
        if (!ok) { // majority class
          normDist = classPriors;
        }

        m_classPriorCounts[(int) instance.classValue()] += instance.weight();

        // if (ok) {
        Utils.normalize(normDist);
        if (m_evaluationMeasure == EVAL_AUC) {
          m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance);
        } else {
          m_evaluation.evaluateModelOnce(normDist, instance);
        }
        return Utils.maxIndex(normDist);
        /*} else {
          normDist = new double [normDist.length];
          normDist[(int)m_majority] = 1.0;
          if (m_evaluationMeasure == EVAL_AUC) {
            m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance);
          } else {
            m_evaluation.evaluateModelOnce(normDist, instance);
          }
          return m_majority;
        } */
      }
      //      return Utils.maxIndex(tempDist);
    } else {

      // see if this one is already in the table
      if ((tempDist = (double[]) m_entries.get(thekey)) != null) {
        normDist = new double[tempDist.length];
        System.arraycopy(tempDist, 0, normDist, 0, tempDist.length);
        normDist[0] -= (instance.classValue() * instance.weight());
        normDist[1] -= instance.weight();
        if (Utils.eq(normDist[1], 0.0)) {
          double[] temp = new double[1];
          temp[0] = m_majority;
          m_evaluation.evaluateModelOnce(temp, instance);
          return m_majority;
        } else {
          double[] temp = new double[1];
          temp[0] = normDist[0] / normDist[1];
          m_evaluation.evaluateModelOnce(temp, instance);
          return temp[0];
        }
      } else {
        throw new Error("This should never happen!");
      }
    }

    // shouldn't get here
    // return 0.0;
  }
예제 #10
0
  /**
   * Generates the classifier.
   *
   * @param data set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    m_theInstances = new Instances(data);
    m_theInstances.deleteWithMissingClass();

    m_rr = new Random(1);

    if (m_theInstances.classAttribute().isNominal()) { // 	 Set up class priors
      m_classPriorCounts = new double[data.classAttribute().numValues()];
      Arrays.fill(m_classPriorCounts, 1.0);
      for (int i = 0; i < data.numInstances(); i++) {
        Instance curr = data.instance(i);
        m_classPriorCounts[(int) curr.classValue()] += curr.weight();
      }
      m_classPriors = m_classPriorCounts.clone();
      Utils.normalize(m_classPriors);
    }

    setUpEvaluator();

    if (m_theInstances.classAttribute().isNumeric()) {
      m_disTransform = new weka.filters.unsupervised.attribute.Discretize();
      m_classIsNominal = false;

      // use binned discretisation if the class is numeric
      ((weka.filters.unsupervised.attribute.Discretize) m_disTransform).setBins(10);
      ((weka.filters.unsupervised.attribute.Discretize) m_disTransform).setInvertSelection(true);

      // Discretize all attributes EXCEPT the class
      String rangeList = "";
      rangeList += (m_theInstances.classIndex() + 1);
      // System.out.println("The class col: "+m_theInstances.classIndex());

      ((weka.filters.unsupervised.attribute.Discretize) m_disTransform)
          .setAttributeIndices(rangeList);
    } else {
      m_disTransform = new weka.filters.supervised.attribute.Discretize();
      ((weka.filters.supervised.attribute.Discretize) m_disTransform).setUseBetterEncoding(true);
      m_classIsNominal = true;
    }

    m_disTransform.setInputFormat(m_theInstances);
    m_theInstances = Filter.useFilter(m_theInstances, m_disTransform);

    m_numAttributes = m_theInstances.numAttributes();
    m_numInstances = m_theInstances.numInstances();
    m_majority = m_theInstances.meanOrMode(m_theInstances.classAttribute());

    // Perform the search
    int[] selected = m_search.search(m_evaluator, m_theInstances);

    m_decisionFeatures = new int[selected.length + 1];
    System.arraycopy(selected, 0, m_decisionFeatures, 0, selected.length);
    m_decisionFeatures[m_decisionFeatures.length - 1] = m_theInstances.classIndex();

    // reduce instances to selected features
    m_delTransform = new Remove();
    m_delTransform.setInvertSelection(true);

    // set features to keep
    m_delTransform.setAttributeIndicesArray(m_decisionFeatures);
    m_delTransform.setInputFormat(m_theInstances);
    m_dtInstances = Filter.useFilter(m_theInstances, m_delTransform);

    // reset the number of attributes
    m_numAttributes = m_dtInstances.numAttributes();

    // create hash table
    m_entries = new Hashtable((int) (m_dtInstances.numInstances() * 1.5));

    // insert instances into the hash table
    for (int i = 0; i < m_numInstances; i++) {
      Instance inst = m_dtInstances.instance(i);
      insertIntoTable(inst, null);
    }

    // Replace the global table majority with nearest neighbour?
    if (m_useIBk) {
      m_ibk = new IBk();
      m_ibk.buildClassifier(m_theInstances);
    }

    // Save memory
    if (m_saveMemory) {
      m_theInstances = new Instances(m_theInstances, 0);
      m_dtInstances = new Instances(m_dtInstances, 0);
    }
    m_evaluation = null;
  }