private RuleModel createNumericalRuleModel(ExampleSet trainingSet, Attribute attribute) {
    RuleModel model = new RuleModel(trainingSet);

    // split by best attribute
    int oldSize = -1;
    while ((trainingSet.size() > 0) && (trainingSet.size() != oldSize)) {
      ExampleSet exampleSet = (ExampleSet) trainingSet.clone();
      Split bestSplit = splitter.getBestSplit(exampleSet, attribute, null);
      double bestSplitValue = bestSplit.getSplitPoint();
      if (!Double.isNaN(bestSplitValue)) {
        SplittedExampleSet splittedSet =
            SplittedExampleSet.splitByAttribute(exampleSet, attribute, bestSplitValue);
        Attribute label = splittedSet.getAttributes().getLabel();
        splittedSet.selectSingleSubset(0);
        SplitCondition condition = new LessEqualsSplitCondition(attribute, bestSplitValue);

        splittedSet.recalculateAttributeStatistics(label);
        int labelValue = (int) splittedSet.getStatistics(label, Statistics.MODE);
        String labelName = label.getMapping().mapIndex(labelValue);
        Rule rule = new Rule(labelName, condition);

        int[] frequencies = new int[label.getMapping().size()];
        int counter = 0;
        for (String value : label.getMapping().getValues())
          frequencies[counter++] = (int) splittedSet.getStatistics(label, Statistics.COUNT, value);
        rule.setFrequencies(frequencies);
        model.addRule(rule);
        oldSize = trainingSet.size();
        trainingSet = rule.removeCovered(trainingSet);
      } else {
        break;
      }
    }

    // add default rule if some examples were not yet covered
    if (trainingSet.size() > 0) {
      Attribute label = trainingSet.getAttributes().getLabel();
      trainingSet.recalculateAttributeStatistics(label);
      int index = (int) trainingSet.getStatistics(label, Statistics.MODE);
      String defaultLabel = label.getMapping().mapIndex(index);
      Rule defaultRule = new Rule(defaultLabel);
      int[] frequencies = new int[label.getMapping().size()];
      int counter = 0;
      for (String value : label.getMapping().getValues())
        frequencies[counter++] = (int) (trainingSet.getStatistics(label, Statistics.COUNT, value));
      defaultRule.setFrequencies(frequencies);
      model.addRule(defaultRule);
    }

    return model;
  }
Ejemplo n.º 2
0
  @Override
  public void estimatePerformance(ExampleSet inputSet) throws OperatorException {
    // split by attribute
    Attribute batchAttribute = inputSet.getAttributes().getSpecial(Attributes.BATCH_NAME);
    if (batchAttribute == null) {
      throw new UserError(this, 113, Attributes.BATCH_NAME);
    }
    SplittedExampleSet splittedES = SplittedExampleSet.splitByAttribute(inputSet, batchAttribute);

    // start crossvalidation
    for (iteration = 0; iteration < splittedES.getNumberOfSubsets(); iteration++) {

      splittedES.selectAllSubsetsBut(iteration);
      learn(splittedES);

      splittedES.selectSingleSubset(iteration);
      evaluate(splittedES);

      inApplyLoop();
    }
    // end crossvalidation
  }
  private RuleModel createNominalRuleModel(ExampleSet exampleSet, Attribute attribute) {
    RuleModel model = new RuleModel(exampleSet);
    SplittedExampleSet splittedSet = SplittedExampleSet.splitByAttribute(exampleSet, attribute);
    Attribute label = splittedSet.getAttributes().getLabel();
    for (int i = 0; i < splittedSet.getNumberOfSubsets(); i++) {
      splittedSet.selectSingleSubset(i);
      splittedSet.recalculateAttributeStatistics(label);
      SplitCondition term =
          new NominalSplitCondition(attribute, attribute.getMapping().mapIndex(i));

      int labelValue = (int) splittedSet.getStatistics(label, Statistics.MODE);
      String labelName = label.getMapping().mapIndex(labelValue);
      Rule rule = new Rule(labelName, term);

      int[] frequencies = new int[label.getMapping().size()];
      int counter = 0;
      for (String value : label.getMapping().getValues())
        frequencies[counter++] = (int) splittedSet.getStatistics(label, Statistics.COUNT, value);
      rule.setFrequencies(frequencies);
      model.addRule(rule);
    }
    return model;
  }