Esempio n. 1
0
  /* (non-Javadoc)
   * @see categorizer.core.Categorizer#load(common.DataContext)
   */
  public void load(DataContext categorizerDataContext) throws Exception {
    super.load(categorizerDataContext);
    DataContext catSpecific = categorizerDataContext.getNode(categorizerSpecificTag);
    priors = catSpecific.getNode(priorsTag);

    Vector tempVector = catSpecific.getElements2(classIndexTag);
    if (tempVector != null && tempVector.size() > 0) {
      for (int i = 0; i < dataSet.getDataHeaders().length; i++) {
        if (dataSet.getDataHeaders()[i].getLabel().equals((String) tempVector.get(0))) {
          classIndex = i;
          break;
        }
      }
    }
    valid = true;
  }
Esempio n. 2
0
  /**
   * Builds the naive bayes classifier by calculating prior and conditional probabilities.
   *
   * @see categorizer.aiCategorizer.core.AICategorizer#buildCategorizer()
   */
  @Override
  public ConfusionMatrix buildCategorizer() throws Exception {

    classIndex = dataSet.getClassIndex();

    if (classIndex == -1) {
      for (int i = dataSet.getDataHeaders().length - 1; i >= 0; i--) {
        if (dataSet.getDataHeaders()[i].isNominal()) {
          classIndex = i;
          break;
        }
      }
    }

    classes = dataSet.getDataHeaders()[classIndex].getAvailableValue();

    if (!dataSet.getDataHeaders()[classIndex].isNominal()) throw new ClassNotNominalException();

    for (int i = 0; i < classes.length; i++) {
      priors.add(new NodePair(classes[i], String.valueOf(0)));
      System.out.println(classes[i]);
    }

    System.out.println("#######################");
    for (int i = 0; i < dataSet.getDataItems().length; i++) {
      if (!dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue().equals("?")) {
        Vector tempVector =
            priors.getElements2(
                dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue());
        if (tempVector != null && tempVector.size() >= 0) {
          int count = Integer.parseInt((String) tempVector.get(0));
          count++;
          priors.remove(dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue());
          priors.add(
              new NodePair(
                  dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue(),
                  String.valueOf(count)));
        }
      }
      //
      //	System.out.println(dataSet.getDataItems()[i].getDataFields()[dataSet.getDataHeaders().length - 1].getStringValue());
    }

    for (int i = 0; i < classes.length; i++) {
      int count = 0;
      Vector tempVector = priors.getElements2(classes[i]);
      if (tempVector != null && tempVector.size() >= 0)
        count = Integer.parseInt((String) tempVector.get(0)) + 1;
      priors.remove(classes[i]);
      priors.add(
          new NodePair(
              classes[i],
              String.valueOf(((double) count) / (dataSet.getDataItems().length + classes.length))));
    }

    DistributionFinder distFinder = new DistributionFinder();
    distFinder.findDistributions(dataSet, classes);

    //		return null;

    valid = true;
    return super.validate(testSet);
  }