/** * Classify an instance using NaiveBayes according to the trained data. The alphabet of the * featureVector of the instance must match the alphabe of the pipe used to train the classifier. * * @param instance to be classified. Data field must be a FeatureVector * @return Classification containing the labeling of the instance */ public Classification classify(Instance instance) { // Note that the current size of the label alphabet can be larger // than it was at the time of training. We are careful here // to correctly handle those labels here. For example, // we assume the log prior probability of those classes is // minus infinity. int numClasses = getLabelAlphabet().size(); double[] scores = new double[numClasses]; FeatureVector fv = (FeatureVector) instance.getData(); // Make sure the feature vector's feature dictionary matches // what we are expecting from our data pipe (and thus our notion // of feature probabilities. assert (instancePipe == null || fv.getAlphabet() == instancePipe.getDataAlphabet()); int fvisize = fv.numLocations(); prior.addLogProbabilities(scores); // Set the scores according to the feature weights and per-class probabilities for (int fvi = 0; fvi < fvisize; fvi++) { int fi = fv.indexAtLocation(fvi); for (int ci = 0; ci < numClasses; ci++) { // guard against dataAlphabet or target alphabet growing; can happen if classifying // a never before seen feature. Ignore these. if (ci >= p.length || fi >= p[ci].size()) continue; scores[ci] += fv.valueAtLocation(fvi) * p[ci].logProbability(fi); } } // Get the scores in the range near zero, where exp() is more accurate double maxScore = Double.NEGATIVE_INFINITY; for (int ci = 0; ci < numClasses; ci++) if (scores[ci] > maxScore) maxScore = scores[ci]; for (int ci = 0; ci < numClasses; ci++) scores[ci] -= maxScore; // Exponentiate and normalize double sum = 0; for (int ci = 0; ci < numClasses; ci++) sum += (scores[ci] = Math.exp(scores[ci])); for (int ci = 0; ci < numClasses; ci++) scores[ci] /= sum; // Create and return a Classification object return new Classification(instance, this, new LabelVector(getLabelAlphabet(), scores)); }