/** * Get the Meteor sufficient statistics for a test give a list of references * * @param test * @param references * @return */ public MeteorStats getMeteorStats(String test, ArrayList<String> references) { // Normalize test if (normalize) test = Normalizer.normalizeLine(test, langID, keepPunctuation); if (lowerCase) test = test.toLowerCase(); MeteorStats stats = new MeteorStats(); stats.score = -1; // Score each reference for (String reference : references) { // Normalize reference if (normalize) reference = Normalizer.normalizeLine(reference, langID, keepPunctuation); if (lowerCase) reference = reference.toLowerCase(); Alignment alignment = aligner.align(test, reference); MeteorStats curStats = getMeteorStats(alignment); if (curStats.score > stats.score) stats = curStats; } return stats; }
/** * Get the Meteor score given sufficient statistics * * @param stats */ public void computeMetrics(MeteorStats stats) { stats.testWeightedMatches = 0; stats.referenceWeightedMatches = 0; stats.testWeightedLength = (delta * (stats.testLength - stats.testFunctionWords)) + ((1.0 - delta) * (stats.testFunctionWords)); stats.referenceWeightedLength = (delta * (stats.referenceLength - stats.referenceFunctionWords)) + ((1.0 - delta) * (stats.referenceFunctionWords)); // Apply module weights and delta to test and reference matches // (Content) for (int i = 0; i < moduleWeights.size(); i++) stats.testWeightedMatches += stats.testStageMatchesContent.get(i) * moduleWeights.get(i) * delta; for (int i = 0; i < moduleWeights.size(); i++) stats.referenceWeightedMatches += stats.referenceStageMatchesContent.get(i) * moduleWeights.get(i) * delta; // Apply module weights and delta to test and reference matches // (Function) for (int i = 0; i < moduleWeights.size(); i++) stats.testWeightedMatches += stats.testStageMatchesFunction.get(i) * moduleWeights.get(i) * (1.0 - delta); for (int i = 0; i < moduleWeights.size(); i++) stats.referenceWeightedMatches += stats.referenceStageMatchesFunction.get(i) * moduleWeights.get(i) * (1.0 - delta); // Precision = test matches / test length stats.precision = stats.testWeightedMatches / stats.testWeightedLength; // Recall = ref matches / ref length stats.recall = stats.referenceWeightedMatches / stats.referenceWeightedLength; // F1 = 2pr / (p + r) [not part of final score] stats.f1 = (2 * stats.precision * stats.recall) / (stats.precision + stats.recall); // Fmean = 1 / alpha-weighted average of p and r stats.fMean = 1.0 / (((1.0 - alpha) / stats.precision) + (alpha / stats.recall)); // Fragmentation double frag; // Case if test = ref if (stats.testTotalMatches == stats.testLength && stats.referenceTotalMatches == stats.referenceLength && stats.chunks == 1) frag = 0; else frag = ((double) stats.chunks) / (((double) (stats.testWordMatches + stats.referenceWordMatches)) / 2); // Fragmentation penalty stats.fragPenalty = gamma * Math.pow(frag, beta); // Score double score = stats.fMean * (1.0 - stats.fragPenalty); // Catch division by zero if (Double.isNaN(score)) stats.score = 0; else // score >= 0.0 stats.score = Math.max(score, 0.0); }
/** * Get the Meteor sufficient statistics for an alignment * * @param alignment * @return */ public MeteorStats getMeteorStats(Alignment alignment) { MeteorStats stats = new MeteorStats(); // Copy alignment stats // Sum word lengths if evaluating by character if (charBased) { stats.testLength = 0; for (String word : alignment.words1) stats.testLength += word.length(); stats.referenceLength = 0; for (String word : alignment.words2) stats.referenceLength += word.length(); stats.testFunctionWords = 0; for (int i : alignment.line1FunctionWords) stats.testFunctionWords += alignment.words1.get(i).length(); stats.referenceFunctionWords = 0; for (int i : alignment.line2FunctionWords) stats.referenceFunctionWords += alignment.words2.get(i).length(); // Module and total matches with summed word lengths int[] testStageMatchesContent = new int[alignment.moduleContentMatches1.size()]; int[] referenceStageMatchesContent = new int[alignment.moduleContentMatches1.size()]; int[] testStageMatchesFunction = new int[alignment.moduleContentMatches1.size()]; int[] referenceStageMatchesFunction = new int[alignment.moduleContentMatches1.size()]; // Sum these here to avoid pushing character-level operations to the // aligner for (Match m : alignment.matches) { if (m != null) { for (int i = 0; i < m.matchLength; i++) if (alignment.line1FunctionWords.contains(m.matchStart + i)) testStageMatchesFunction[m.module] += alignment.words1.get(m.matchStart + i).length(); else testStageMatchesContent[m.module] += alignment.words1.get(m.matchStart + i).length(); for (int i = 0; i < m.length; i++) if (alignment.line2FunctionWords.contains(m.start + i)) referenceStageMatchesFunction[m.module] += alignment.words2.get(m.start + i).length(); else referenceStageMatchesContent[m.module] += alignment.words2.get(m.start + i).length(); } } for (int i = 0; i < alignment.moduleContentMatches1.size(); i++) { stats.testStageMatchesContent.add(testStageMatchesContent[i]); stats.referenceStageMatchesContent.add(referenceStageMatchesContent[i]); stats.testStageMatchesFunction.add(testStageMatchesFunction[i]); stats.referenceStageMatchesFunction.add(referenceStageMatchesFunction[i]); } } // Otherwise use word counts else { stats.testLength = alignment.words1.size(); stats.referenceLength = alignment.words2.size(); stats.testFunctionWords = alignment.line1FunctionWords.size(); stats.referenceFunctionWords = alignment.line2FunctionWords.size(); stats.testStageMatchesContent = new ArrayList<Integer>(alignment.moduleContentMatches1); stats.referenceStageMatchesContent = new ArrayList<Integer>(alignment.moduleContentMatches2); stats.testStageMatchesFunction = new ArrayList<Integer>(alignment.moduleFunctionMatches1); stats.referenceStageMatchesFunction = new ArrayList<Integer>(alignment.moduleFunctionMatches2); } // Same for word and character level stats.chunks = alignment.numChunks; // Total matches // Important: sum from stage matches instead of taking total from // alignment as alignment totals are WEIGHTED and totals here are // UNWEIGHTED for (int i = 0; i < stats.testStageMatchesContent.size(); i++) { stats.testTotalMatches += stats.testStageMatchesContent.get(i); stats.testTotalMatches += stats.testStageMatchesFunction.get(i); stats.referenceTotalMatches += stats.referenceStageMatchesContent.get(i); stats.referenceTotalMatches += stats.referenceStageMatchesFunction.get(i); // Total for fragmentation/reporting stats.testWordMatches += alignment.moduleContentMatches1.get(i); stats.testWordMatches += alignment.moduleFunctionMatches1.get(i); stats.referenceWordMatches += alignment.moduleContentMatches2.get(i); stats.referenceWordMatches += alignment.moduleFunctionMatches2.get(i); } // Meteor score is required to pick best reference computeMetrics(stats); // Keep underlying alignment stats.alignment = alignment; return stats; }