Beispiel #1
0
 /*
  * Used to reinitialize the static variables of this class, when DataHolder is updated, since static variables
  * aren't updated automatically.
  */
 public static void reInitializeStaticVariables() {
   samples =
       new SampleCollection(
           DataHolder.getTrainingSamplesFileName(), DataHolder.getAttributesFileName());
   FeatureSuperSet = samples.getfeatureList();
   samples.discretizeSamples(Constants.DiscretizerAlgorithms.EQUAL_BINNING);
 }
Beispiel #2
0
  /*
   * A redesigned Fitness Function calculator
   * 	It takes into account the accuracy of the decision tree while classifying both, training and test examples
   * 	The fitness score is a function of the weighted average of the two accuracies.
   */
  private void calculateFitnessScore(double trainingWeight, double testingWeight)
      throws OptimalScoreException {
    DecisionTreeClassifier dtClassifier = getDecisionTree();
    dtClassifier.TestAndFindAccuracy();

    // Part 1 - Get training set accuracy
    double trainingSetAccuracy = dtClassifier.getAccuracy();

    // Part 2 - Get test set accuracy
    SampleCollection test_samples =
        new SampleCollection(
            DataHolder.getTestingSamplesFileName(), DataHolder.getAttributesFileName());
    // test_samples.discretizeSamples(Constants.DiscretizerAlgorithms.EQUAL_BINNING);
    test_samples.discretizeSamplesBasedOnOtherSampleCollection(dtClassifier.getTrainingSamples());
    dtClassifier.setTestingSamples(test_samples);
    dtClassifier.TestAndFindAccuracy();
    double testSetAccuracy = dtClassifier.getAccuracy();

    fitnessScore =
        (trainingWeight * trainingSetAccuracy + testingWeight * testSetAccuracy)
            / (trainingWeight + testingWeight);
    // fitnessScore = trainingSetAccuracy; It was running very slowly that's why all this circus.
    // We'll find a solution.
    // fitnessScore = trainingSetAccuracy > testSetAccuracy ? trainingSetAccuracy : testSetAccuracy;
  }
Beispiel #3
0
  /*
   * Returns a new decision tree that was created by using only the features present in the chromosome.
   */
  public DecisionTreeClassifier getDecisionTree() {
    ArrayList<String> features = new ArrayList<String>();

    for (int i = 0; i < chromosome.length(); ++i)
      if (chromosome.charAt(i) == '1') features.add(FeatureSuperSet.get(i));

    DecisionTreeClassifier dtClassifier = new DecisionTreeClassifier(samples, features);
    SampleCollection training_samples =
        new SampleCollection(
            samples.getSamplesFilename(Filenames.TRAINING_SAMPLES_FILE),
            samples.getSamplesFilename(Filenames.FEATURES_FILE));
    training_samples.discretizeSamples(Constants.DiscretizerAlgorithms.EQUAL_BINNING);
    dtClassifier.setTestingSamples(training_samples);
    dtClassifier.setTrainingSamples(training_samples);

    return dtClassifier;
  }