/**
  * Run the randomization test
  *
  * @param baseline
  * @param target
  * @return
  */
 public double test(HashMap<String, Double> target, HashMap<String, Double> baseline) {
   double[] b = new double[baseline.keySet().size()]; // baseline
   double[] t = new double[target.keySet().size()]; // target
   int c = 0;
   for (String key : baseline.keySet()) {
     b[c] = baseline.get(key).doubleValue();
     t[c] = target.get(key).doubleValue();
     c++;
   }
   double trueDiff = Math.abs(BasicStats.mean(b) - BasicStats.mean(t));
   double pvalue = 0.0;
   double[] pb = new double[baseline.keySet().size()]; // permutation of baseline
   double[] pt = new double[target.keySet().size()]; // permutation of target
   for (int i = 0; i < nPermutation; i++) {
     char[] bits = randomBitVector(b.length).toCharArray();
     for (int j = 0; j < b.length; j++) {
       if (bits[j] == '0') {
         pb[j] = b[j];
         pt[j] = t[j];
       } else {
         pb[j] = t[j];
         pt[j] = b[j];
       }
     }
     double pDiff = Math.abs(BasicStats.mean(pb) - BasicStats.mean(pt));
     if (pDiff >= trueDiff) pvalue += 1.0;
   }
   return pvalue / nPermutation;
 }
Beispiel #2
0
 @Override
 public final Explanation explain(BasicStats stats) {
   return Explanation.match(
       lambda(stats),
       getClass().getSimpleName() + ", computed from: ",
       Explanation.match(stats.getDocFreq(), "docFreq"),
       Explanation.match(stats.getNumberOfDocuments(), "numberOfDocuments"));
 }
 /**
  * Returns an explanation for the normalized term frequency.
  *
  * <p>The default normalization methods use the field length of the document and the average field
  * length to compute the normalized term frequency. This method provides a generic explanation for
  * such methods. Subclasses that use other statistics must override this method.
  */
 public Explanation explain(BasicStats stats, float tf, float len) {
   Explanation result = new Explanation();
   result.setDescription(getClass().getSimpleName() + ", computed from: ");
   result.setValue(tfn(stats, tf, len));
   result.addDetail(new Explanation(tf, "tf"));
   result.addDetail(new Explanation(stats.getAvgFieldLength(), "avgFieldLength"));
   result.addDetail(new Explanation(len, "len"));
   return result;
 }
Beispiel #4
0
 @Override
 public final float lambda(BasicStats stats) {
   return (stats.getDocFreq() + 1F) / (stats.getNumberOfDocuments() + 1F);
 }