/** * Correlation when one variable (X) is binary: r = (bar(x1) - bar(x0)) * sqrt(p(1-p)) / sx , * where bar(x1) = mean of X when Y is 1 bar(x0) = mean of X when Y is 0 sx = standard deviation * of X p = proportion of values where Y=1 */ public double pointBiserialCorrelation() { // here, Y = {incorrect = 0,correct = 1}, X = confidence double x0bar = getAverageIncorrectConfidence(); double x1bar = getAverageCorrectConfidence(); double p = (double) this.numCorrect / size(); double sx = getConfidenceStandardDeviation(); return (x1bar - x0bar) * Math.sqrt(p * (1 - p)) / sx; }
/** Standard deviation of confidence scores */ public double getConfidenceStandardDeviation() { double mean = getConfidenceMean(); double sumSquaredDifference = 0.0; for (int i = 0; i < size(); i++) { double conf = ((EntityConfidence) confidences.get(i)).confidence(); sumSquaredDifference += ((conf - mean) * (conf - mean)); } return Math.sqrt(sumSquaredDifference / (double) size()); }
/** * Calculate pearson's R for the corellation between confidence and correct, where 1 = correct and * -1 = incorrect */ public double correlation() { double xSum = 0; double xSumOfSquares = 0; double ySum = 0; double ySumOfSquares = 0; double xySum = 0; // product of x and y for (int i = 0; i < size(); i++) { double value = ((EntityConfidence) confidences.get(i)).correct() ? 1.0 : -1.0; xSum += value; xSumOfSquares += (value * value); double conf = ((EntityConfidence) confidences.get(i)).confidence(); ySum += conf; ySumOfSquares += (conf * conf); xySum += value * conf; } double xVariance = xSumOfSquares - (xSum * xSum / size()); double yVariance = ySumOfSquares - (ySum * ySum / size()); double crossVariance = xySum - (xSum * ySum / size()); return crossVariance / Math.sqrt(xVariance * yVariance); }