/** * @param maxGrammar * @param maxLexicon * @param validationStateSetTrees * @return */ public static double calculateLogLikelihood( Grammar maxGrammar, Lexicon maxLexicon, StateSetTreeList validationStateSetTrees) { ArrayParser parser = new ArrayParser(maxGrammar, maxLexicon); int unparsable = 0; double maxLikelihood = 0; for (Tree<StateSet> stateSetTree : validationStateSetTrees) { parser.doInsideScores(stateSetTree, false, false, null); // Only inside scores are needed here double ll = stateSetTree.getLabel().getIScore(0); ll = Math.log(ll) + (100 * stateSetTree.getLabel().getIScale()); if (Double.isInfinite(ll) || Double.isNaN(ll)) { unparsable++; // printBadLLReason(stateSetTree, lexicon); } else maxLikelihood += ll; // there are for some reason some sentences that are unparsable } // if (unparsable>0) System.out.print("Number of unparsable trees: "+unparsable+"."); return maxLikelihood; }
/** * @param previousGrammar * @param previousLexicon * @param grammar * @param lexicon * @param trainStateSetTrees * @return */ public static double doOneEStep( Grammar previousGrammar, Lexicon previousLexicon, Grammar grammar, Lexicon lexicon, StateSetTreeList trainStateSetTrees, boolean updateOnlyLexicon, int unkThreshold) { boolean secondHalf = false; ArrayParser parser = new ArrayParser(previousGrammar, previousLexicon); double trainingLikelihood = 0; int n = 0; int nTrees = trainStateSetTrees.size(); for (Tree<StateSet> stateSetTree : trainStateSetTrees) { secondHalf = (n++ > nTrees / 2.0); boolean noSmoothing = true, debugOutput = false; parser.doInsideOutsideScores(stateSetTree, noSmoothing, debugOutput); // E Step double ll = stateSetTree.getLabel().getIScore(0); ll = Math.log(ll) + (100 * stateSetTree.getLabel().getIScale()); // System.out.println(stateSetTree); if ((Double.isInfinite(ll) || Double.isNaN(ll))) { if (VERBOSE) { System.out.println("Training sentence " + n + " is given " + ll + " log likelihood!"); System.out.println( "Root iScore " + stateSetTree.getLabel().getIScore(0) + " scale " + stateSetTree.getLabel().getIScale()); } } else { lexicon.trainTree(stateSetTree, -1, previousLexicon, secondHalf, noSmoothing, unkThreshold); if (!updateOnlyLexicon) grammar.tallyStateSetTree(stateSetTree, previousGrammar); // E Step trainingLikelihood += ll; // there are for some reason some sentences that are unparsable } } lexicon.tieRareWordStats(unkThreshold); // SSIE ((SophisticatedLexicon) lexicon).overwriteWithMaxent(); return trainingLikelihood; }
/** * This updates the inside-outside probabilities for the list of trees using the parser's * doInsideScores and doOutsideScores methods. * * @param trees A list of binarized, annotated StateSet Trees. * @param parser The parser to score the trees. */ public static void updateStateSetTrees(List<Tree<StateSet>> trees, ArrayParser parser) { for (Tree<StateSet> tree : trees) { parser.doInsideOutsideScores(tree, false, false); } }