/**
  * Correlation when one variable (X) is binary: r = (bar(x1) - bar(x0)) * sqrt(p(1-p)) / sx ,
  * where bar(x1) = mean of X when Y is 1 bar(x0) = mean of X when Y is 0 sx = standard deviation
  * of X p = proportion of values where Y=1
  */
 public double pointBiserialCorrelation() {
   // here, Y = {incorrect = 0,correct = 1}, X = confidence
   double x0bar = getAverageIncorrectConfidence();
   double x1bar = getAverageCorrectConfidence();
   double p = (double) this.numCorrect / size();
   double sx = getConfidenceStandardDeviation();
   return (x1bar - x0bar) * Math.sqrt(p * (1 - p)) / sx;
 }
 /** Standard deviation of confidence scores */
 public double getConfidenceStandardDeviation() {
   double mean = getConfidenceMean();
   double sumSquaredDifference = 0.0;
   for (int i = 0; i < size(); i++) {
     double conf = ((EntityConfidence) confidences.get(i)).confidence();
     sumSquaredDifference += ((conf - mean) * (conf - mean));
   }
   return Math.sqrt(sumSquaredDifference / (double) size());
 }
 public int numCorrectAtCoverage(double cov) {
   assert (cov <= 1 && cov > 0);
   // num accuracies to sum for this value of cov
   int numPoints = (int) (Math.round((double) size() * cov));
   int numCorrect = 0;
   for (int i = 0; i < numPoints; i++) {
     if (((EntityConfidence) confidences.get(size() - i - 1)).correct()) numCorrect++;
   }
   return numCorrect;
 }
 /**
  * Calculate pearson's R for the corellation between confidence and correct, where 1 = correct and
  * -1 = incorrect
  */
 public double correlation() {
   double xSum = 0;
   double xSumOfSquares = 0;
   double ySum = 0;
   double ySumOfSquares = 0;
   double xySum = 0; // product of x and y
   for (int i = 0; i < size(); i++) {
     double value = ((EntityConfidence) confidences.get(i)).correct() ? 1.0 : -1.0;
     xSum += value;
     xSumOfSquares += (value * value);
     double conf = ((EntityConfidence) confidences.get(i)).confidence();
     ySum += conf;
     ySumOfSquares += (conf * conf);
     xySum += value * conf;
   }
   double xVariance = xSumOfSquares - (xSum * xSum / size());
   double yVariance = ySumOfSquares - (ySum * ySum / size());
   double crossVariance = xySum - (xSum * ySum / size());
   return crossVariance / Math.sqrt(xVariance * yVariance);
 }
 public double accuracyAtCoverage(double cov) {
   assert (cov <= 1 && cov > 0);
   int numPoints = (int) (Math.round((double) size() * cov));
   return ((double) numCorrectAtCoverage(cov) / numPoints);
 }