/** Calculate the log probabilities of each class, for the given datum (feature bundle). */ public <F, L> double[] getLogProbabilities( EncodedDatum datum, double[] weights, Encoding<F, L> encoding, IndexLinearizer indexLinearizer) { // Compute unnormalized log probabilities int numSubLabels = encoding.getNumSubLabels(); double[] logProbabilities = DoubleArrays.constantArray(0.0, numSubLabels); for (int i = 0; i < datum.getNumActiveFeatures(); i++) { int featureIndex = datum.getFeatureIndex(i); double featureCount = datum.getFeatureCount(i); for (int j = 0; j < numSubLabels; j++) { int index = indexLinearizer.getLinearIndex(featureIndex, j); double weight = weights[index]; logProbabilities[j] += weight * featureCount; } } // Normalize double logNormalizer = SloppyMath.logAdd(logProbabilities); for (int i = 0; i < numSubLabels; i++) { logProbabilities[i] -= logNormalizer; } return logProbabilities; }
/** * The most important part of the classifier learning process! This method determines, for the * given weight vector x, what the (negative) log conditional likelihood of the data is, as well * as the derivatives of that likelihood wrt each weight parameter. */ public Pair<Double, double[]> calculate() { double objective = 0.0; System.out.println("In Calculate..."); double[] derivatives = DoubleArrays.constantArray(0.0, dimension()); int numSubLabels = encoding.getNumSubLabels(); int numData = data.length; for (int l = 0; l < numData; ++l) { EncodedDatum datum = data[l]; double[] logProbabilities = getLogProbabilities(datum, x, encoding, indexLinearizer); int C = datum.getLabelIndex(); double[] labelWeights = datum.getWeights(); int numSubstatesC = labelWeights.length; int substate0 = encoding.getLabelSubindexBegin(C); for (int c = 0; c < numSubstatesC; c++) { // For each substate of label C objective -= labelWeights[c] * logProbabilities[substate0 + c]; } // Convert to probabilities: double[] probabilities = new double[numSubLabels]; double sum = 0.0; for (int c = 0; c < numSubLabels; ++c) { // For each substate probabilities[c] = Math.exp(logProbabilities[c]); sum += probabilities[c]; } if (Math.abs(sum - 1.0) > 1e-3) { System.err.println("Probabilities do not sum to 1!"); } // Compute derivatives: for (int i = 0; i < datum.getNumActiveFeatures(); ++i) { int featureIndex = datum.getFeatureIndex(i); double featureCount = datum.getFeatureCount(i); for (int c = 0; c < numSubLabels; ++c) { // For each substate int index = indexLinearizer.getLinearIndex(featureIndex, c); derivatives[index] += featureCount * probabilities[c]; } for (int c = 0; c < numSubstatesC; c++) { // For each substate of label C int index = indexLinearizer.getLinearIndex(featureIndex, substate0 + c); derivatives[index] -= labelWeights[c] * featureCount; } } } // Incorporate penalty terms (regularization) into the objective and derivatives double sigma2 = sigma * sigma; double penalty = 0.0; for (int index = 0; index < x.length; ++index) { penalty += x[index] * x[index]; } objective += penalty / (2 * sigma2); for (int index = 0; index < x.length; ++index) { // 'x' and 'derivatives' have same layout derivatives[index] += x[index] / sigma2; } return new Pair<Double, double[]>(objective, derivatives); }