/* (non-Javadoc) * @see categorizer.core.Categorizer#load(common.DataContext) */ public void load(DataContext categorizerDataContext) throws Exception { super.load(categorizerDataContext); DataContext catSpecific = categorizerDataContext.getNode(categorizerSpecificTag); priors = catSpecific.getNode(priorsTag); Vector tempVector = catSpecific.getElements2(classIndexTag); if (tempVector != null && tempVector.size() > 0) { for (int i = 0; i < dataSet.getDataHeaders().length; i++) { if (dataSet.getDataHeaders()[i].getLabel().equals((String) tempVector.get(0))) { classIndex = i; break; } } } valid = true; }
/** * Builds the naive bayes classifier by calculating prior and conditional probabilities. * * @see categorizer.aiCategorizer.core.AICategorizer#buildCategorizer() */ @Override public ConfusionMatrix buildCategorizer() throws Exception { classIndex = dataSet.getClassIndex(); if (classIndex == -1) { for (int i = dataSet.getDataHeaders().length - 1; i >= 0; i--) { if (dataSet.getDataHeaders()[i].isNominal()) { classIndex = i; break; } } } classes = dataSet.getDataHeaders()[classIndex].getAvailableValue(); if (!dataSet.getDataHeaders()[classIndex].isNominal()) throw new ClassNotNominalException(); for (int i = 0; i < classes.length; i++) { priors.add(new NodePair(classes[i], String.valueOf(0))); System.out.println(classes[i]); } System.out.println("#######################"); for (int i = 0; i < dataSet.getDataItems().length; i++) { if (!dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue().equals("?")) { Vector tempVector = priors.getElements2( dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue()); if (tempVector != null && tempVector.size() >= 0) { int count = Integer.parseInt((String) tempVector.get(0)); count++; priors.remove(dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue()); priors.add( new NodePair( dataSet.getDataItems()[i].getDataFields()[classIndex].getStringValue(), String.valueOf(count))); } } // // System.out.println(dataSet.getDataItems()[i].getDataFields()[dataSet.getDataHeaders().length - 1].getStringValue()); } for (int i = 0; i < classes.length; i++) { int count = 0; Vector tempVector = priors.getElements2(classes[i]); if (tempVector != null && tempVector.size() >= 0) count = Integer.parseInt((String) tempVector.get(0)) + 1; priors.remove(classes[i]); priors.add( new NodePair( classes[i], String.valueOf(((double) count) / (dataSet.getDataItems().length + classes.length)))); } DistributionFinder distFinder = new DistributionFinder(); distFinder.findDistributions(dataSet, classes); // return null; valid = true; return super.validate(testSet); }