예제 #1
0
  /**
   * Compute and store statistics required for generating artificial data.
   *
   * @param data training instances
   * @exception Exception if statistics could not be calculated successfully
   */
  protected void computeStats(Instances data) throws Exception {
    int numAttributes = data.numAttributes();
    m_AttributeStats = new Vector(numAttributes); // use to map attributes to their stats

    for (int j = 0; j < numAttributes; j++) {
      if (data.attribute(j).isNominal()) {
        // Compute the probability of occurence of each distinct value
        int[] nomCounts = (data.attributeStats(j)).nominalCounts;
        double[] counts = new double[nomCounts.length];
        if (counts.length < 2)
          throw new Exception("Nominal attribute has less than two distinct values!");
        // Perform Laplace smoothing
        for (int i = 0; i < counts.length; i++) counts[i] = nomCounts[i] + 1;
        Utils.normalize(counts);
        double[] stats = new double[counts.length - 1];
        stats[0] = counts[0];
        // Calculate cumulative probabilities
        for (int i = 1; i < stats.length; i++) stats[i] = stats[i - 1] + counts[i];
        m_AttributeStats.add(j, stats);
      } else if (data.attribute(j).isNumeric()) {
        // Get mean and standard deviation from the training data
        double[] stats = new double[2];
        stats[0] = data.meanOrMode(j);
        stats[1] = Math.sqrt(data.variance(j));
        m_AttributeStats.add(j, stats);
      } else System.err.println("Decorate can only handle numeric and nominal values.");
    }
  }
예제 #2
0
  /**
   * Build Decorate classifier
   *
   * @param data the training data to be used for generating the classifier
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {
    if (m_Classifier == null) {
      throw new Exception("A base classifier has not been specified!");
    }
    if (data.checkForStringAttributes()) {
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    }
    if (data.classAttribute().isNumeric()) {
      throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
    }
    if (m_NumIterations < m_DesiredSize)
      throw new Exception("Max number of iterations must be >= desired ensemble size!");

    // initialize random number generator
    if (m_Seed == -1) m_Random = new Random();
    else m_Random = new Random(m_Seed);

    int i = 1; // current committee size
    int numTrials = 1; // number of Decorate iterations
    Instances divData = new Instances(data); // local copy of data - diversity data
    divData.deleteWithMissingClass();
    Instances artData = null; // artificial data

    // compute number of artficial instances to add at each iteration
    int artSize = (int) (Math.abs(m_ArtSize) * divData.numInstances());
    if (artSize == 0) artSize = 1; // atleast add one random example
    computeStats(data); // Compute training data stats for creating artificial examples

    // initialize new committee
    m_Committee = new Vector();
    Classifier newClassifier = m_Classifier;
    newClassifier.buildClassifier(divData);
    m_Committee.add(newClassifier);
    double eComm = computeError(divData); // compute ensemble error
    if (m_Debug)
      System.out.println(
          "Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm);

    // repeat till desired committee size is reached OR the max number of iterations is exceeded
    while (i < m_DesiredSize && numTrials < m_NumIterations) {
      // Generate artificial training examples
      artData = generateArtificialData(artSize, data);

      // Label artificial examples
      labelData(artData);
      addInstances(divData, artData); // Add new artificial data

      // Build new classifier
      Classifier tmp[] = Classifier.makeCopies(m_Classifier, 1);
      newClassifier = tmp[0];
      newClassifier.buildClassifier(divData);
      // Remove all the artificial data
      removeInstances(divData, artSize);

      // Test if the new classifier should be added to the ensemble
      m_Committee.add(newClassifier); // add new classifier to current committee
      double currError = computeError(divData);
      if (currError <= eComm) { // adding the new member did not increase the error
        i++;
        eComm = currError;
        if (m_Debug)
          System.out.println(
              "Iteration: "
                  + (1 + numTrials)
                  + "\tClassifier "
                  + i
                  + " added to ensemble. Ensemble error = "
                  + eComm);
      } else { // reject the current classifier because it increased the ensemble error
        m_Committee.removeElementAt(m_Committee.size() - 1); // pop the last member
      }
      numTrials++;
    }
  }