/** * Correlation when one variable (X) is binary: r = (bar(x1) - bar(x0)) * sqrt(p(1-p)) / sx , * where bar(x1) = mean of X when Y is 1 bar(x0) = mean of X when Y is 0 sx = standard deviation * of X p = proportion of values where Y=1 */ public double pointBiserialCorrelation() { // here, Y = {incorrect = 0,correct = 1}, X = confidence double x0bar = getAverageIncorrectConfidence(); double x1bar = getAverageCorrectConfidence(); double p = (double) this.numCorrect / size(); double sx = getConfidenceStandardDeviation(); return (x1bar - x0bar) * Math.sqrt(p * (1 - p)) / sx; }
/** Standard deviation of confidence scores */ public double getConfidenceStandardDeviation() { double mean = getConfidenceMean(); double sumSquaredDifference = 0.0; for (int i = 0; i < size(); i++) { double conf = ((EntityConfidence) confidences.get(i)).confidence(); sumSquaredDifference += ((conf - mean) * (conf - mean)); } return Math.sqrt(sumSquaredDifference / (double) size()); }
public int numCorrectAtCoverage(double cov) { assert (cov <= 1 && cov > 0); // num accuracies to sum for this value of cov int numPoints = (int) (Math.round((double) size() * cov)); int numCorrect = 0; for (int i = 0; i < numPoints; i++) { if (((EntityConfidence) confidences.get(size() - i - 1)).correct()) numCorrect++; } return numCorrect; }
/** * Calculate pearson's R for the corellation between confidence and correct, where 1 = correct and * -1 = incorrect */ public double correlation() { double xSum = 0; double xSumOfSquares = 0; double ySum = 0; double ySumOfSquares = 0; double xySum = 0; // product of x and y for (int i = 0; i < size(); i++) { double value = ((EntityConfidence) confidences.get(i)).correct() ? 1.0 : -1.0; xSum += value; xSumOfSquares += (value * value); double conf = ((EntityConfidence) confidences.get(i)).confidence(); ySum += conf; ySumOfSquares += (conf * conf); xySum += value * conf; } double xVariance = xSumOfSquares - (xSum * xSum / size()); double yVariance = ySumOfSquares - (ySum * ySum / size()); double crossVariance = xySum - (xSum * ySum / size()); return crossVariance / Math.sqrt(xVariance * yVariance); }
public double accuracyAtCoverage(double cov) { assert (cov <= 1 && cov > 0); int numPoints = (int) (Math.round((double) size() * cov)); return ((double) numCorrectAtCoverage(cov) / numPoints); }