Example #1
0
 public static Instances removeUseless(Instances inInstances) throws Exception {
   // filter out useless
   RemoveUseless removeUselessFilter = new RemoveUseless();
   removeUselessFilter.setMaximumVariancePercentageAllowed(99.0);
   removeUselessFilter.setInputFormat(inInstances);
   return Filter.useFilter(inInstances, removeUselessFilter);
 }
Example #2
0
  /**
   * Computes the distribution for a given instance
   *
   * @param instance the instance for which distribution is computed
   * @return the distribution
   * @throws Exception if the distribution can't be computed successfully
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    m_ReplaceMissingValues.input(instance);
    instance = m_ReplaceMissingValues.output();
    m_AttFilter.input(instance);
    instance = m_AttFilter.output();
    m_NominalToBinary.input(instance);
    instance = m_NominalToBinary.output();

    // Extract the predictor columns into an array
    double[] instDat = new double[m_NumPredictors + 1];
    int j = 1;
    instDat[0] = 1;
    for (int k = 0; k <= m_NumPredictors; k++) {
      if (k != m_ClassIndex) {
        instDat[j++] = instance.value(k);
      }
    }

    double[] distribution = evaluateProbability(instDat);
    return distribution;
  }
Example #3
0
  /**
   * Builds the classifier
   *
   * @param train the training data to be used for generating the boosted classifier.
   * @throws Exception if the classifier could not be built successfully
   */
  @Override
  public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    double[] xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
      System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
      // initialize X[][]
      Instance current = train.instance(i);
      Y[i] = (int) current.classValue(); // Class value starts from 0
      weights[i] = current.weight(); // Dealing with weights
      totWeights += weights[i];

      m_Data[i][0] = 1;
      int j = 1;
      for (int k = 0; k <= nR; k++) {
        if (k != m_ClassIndex) {
          double x = current.value(k);
          m_Data[i][j] = x;
          xMean[j] += weights[i] * x;
          xSD[j] += weights[i] * x * x;
          j++;
        }
      }

      // Class count
      sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1)) {
      throw new Exception("Sum of weights of instances less than 1, please reweight!");
    }

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
      xMean[j] = xMean[j] / totWeights;
      if (totWeights > 1) {
        xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
      } else {
        xSD[j] = 0;
      }
    }

    if (m_Debug) {
      // Output stats about input data
      System.out.println("Descriptives...");
      for (int m = 0; m <= nK; m++) {
        System.out.println(sY[m] + " cases have class " + m);
      }
      System.out.println("\n Variable     Avg       SD    ");
      for (int j = 1; j <= nR; j++) {
        System.out.println(
            Utils.doubleToString(j, 8, 4)
                + Utils.doubleToString(xMean[j], 10, 4)
                + Utils.doubleToString(xSD[j], 10, 4));
      }
    }

    // Normalise input data
    for (int i = 0; i < nC; i++) {
      for (int j = 0; j <= nR; j++) {
        if (xSD[j] != 0) {
          m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
        }
      }
    }

    if (m_Debug) {
      System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
      int offset = p * (nR + 1);
      x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
      b[0][offset] = Double.NaN;
      b[1][offset] = Double.NaN;
      for (int q = 1; q <= nR; q++) {
        x[offset + q] = 0.0;
        b[0][offset + q] = Double.NaN;
        b[1][offset + q] = Double.NaN;
      }
    }

    OptObject oO = new OptObject();
    oO.setWeights(weights);
    oO.setClassLabels(Y);

    Optimization opt = null;
    if (m_useConjugateGradientDescent) {
      opt = new OptEngCG(oO);
    } else {
      opt = new OptEng(oO);
    }
    opt.setDebug(m_Debug);

    if (m_MaxIts == -1) { // Search until convergence
      x = opt.findArgmin(x, b);
      while (x == null) {
        x = opt.getVarbValues();
        if (m_Debug) {
          System.out.println("First set of iterations finished, not enough!");
        }
        x = opt.findArgmin(x, b);
      }
      if (m_Debug) {
        System.out.println(" -------------<Converged>--------------");
      }
    } else {
      opt.setMaxIteration(m_MaxIts);
      x = opt.findArgmin(x, b);
      if (x == null) {
        x = opt.getVarbValues();
      }
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
      m_Par[0][i] = x[i * (nR + 1)];
      for (int j = 1; j <= nR; j++) {
        m_Par[j][i] = x[i * (nR + 1) + j];
        if (xSD[j] != 0) {
          m_Par[j][i] /= xSD[j];
          m_Par[0][i] -= m_Par[j][i] * xMean[j];
        }
      }
    }
  }