/** * Run the randomization test * * @param baseline * @param target * @return */ public double test(HashMap<String, Double> target, HashMap<String, Double> baseline) { double[] b = new double[baseline.keySet().size()]; // baseline double[] t = new double[target.keySet().size()]; // target int c = 0; for (String key : baseline.keySet()) { b[c] = baseline.get(key).doubleValue(); t[c] = target.get(key).doubleValue(); c++; } double trueDiff = Math.abs(BasicStats.mean(b) - BasicStats.mean(t)); double pvalue = 0.0; double[] pb = new double[baseline.keySet().size()]; // permutation of baseline double[] pt = new double[target.keySet().size()]; // permutation of target for (int i = 0; i < nPermutation; i++) { char[] bits = randomBitVector(b.length).toCharArray(); for (int j = 0; j < b.length; j++) { if (bits[j] == '0') { pb[j] = b[j]; pt[j] = t[j]; } else { pb[j] = t[j]; pt[j] = b[j]; } } double pDiff = Math.abs(BasicStats.mean(pb) - BasicStats.mean(pt)); if (pDiff >= trueDiff) pvalue += 1.0; } return pvalue / nPermutation; }
@Override public final Explanation explain(BasicStats stats) { return Explanation.match( lambda(stats), getClass().getSimpleName() + ", computed from: ", Explanation.match(stats.getDocFreq(), "docFreq"), Explanation.match(stats.getNumberOfDocuments(), "numberOfDocuments")); }
/** * Returns an explanation for the normalized term frequency. * * <p>The default normalization methods use the field length of the document and the average field * length to compute the normalized term frequency. This method provides a generic explanation for * such methods. Subclasses that use other statistics must override this method. */ public Explanation explain(BasicStats stats, float tf, float len) { Explanation result = new Explanation(); result.setDescription(getClass().getSimpleName() + ", computed from: "); result.setValue(tfn(stats, tf, len)); result.addDetail(new Explanation(tf, "tf")); result.addDetail(new Explanation(stats.getAvgFieldLength(), "avgFieldLength")); result.addDetail(new Explanation(len, "len")); return result; }
@Override public final float lambda(BasicStats stats) { return (stats.getDocFreq() + 1F) / (stats.getNumberOfDocuments() + 1F); }