Пример #1
0
  /**
   * @param previousGrammar
   * @param previousLexicon
   * @param grammar
   * @param lexicon
   * @param trainStateSetTrees
   * @return
   */
  public static double doOneEStep(
      Grammar previousGrammar,
      Lexicon previousLexicon,
      Grammar grammar,
      Lexicon lexicon,
      StateSetTreeList trainStateSetTrees,
      boolean updateOnlyLexicon,
      int unkThreshold) {
    boolean secondHalf = false;
    ArrayParser parser = new ArrayParser(previousGrammar, previousLexicon);
    double trainingLikelihood = 0;
    int n = 0;
    int nTrees = trainStateSetTrees.size();
    for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
      secondHalf = (n++ > nTrees / 2.0);
      boolean noSmoothing = true, debugOutput = false;
      parser.doInsideOutsideScores(stateSetTree, noSmoothing, debugOutput); // E Step
      double ll = stateSetTree.getLabel().getIScore(0);
      ll =
          Math.log(ll)
              + (100 * stateSetTree.getLabel().getIScale()); // System.out.println(stateSetTree);
      if ((Double.isInfinite(ll) || Double.isNaN(ll))) {
        if (VERBOSE) {
          System.out.println("Training sentence " + n + " is given " + ll + " log likelihood!");
          System.out.println(
              "Root iScore "
                  + stateSetTree.getLabel().getIScore(0)
                  + " scale "
                  + stateSetTree.getLabel().getIScale());
        }
      } else {
        lexicon.trainTree(stateSetTree, -1, previousLexicon, secondHalf, noSmoothing, unkThreshold);
        if (!updateOnlyLexicon) grammar.tallyStateSetTree(stateSetTree, previousGrammar); // E Step
        trainingLikelihood += ll; // there are for some reason some sentences that are unparsable
      }
    }
    lexicon.tieRareWordStats(unkThreshold);

    // SSIE
    ((SophisticatedLexicon) lexicon).overwriteWithMaxent();

    return trainingLikelihood;
  }
Пример #2
0
 /**
  * This updates the inside-outside probabilities for the list of trees using the parser's
  * doInsideScores and doOutsideScores methods.
  *
  * @param trees A list of binarized, annotated StateSet Trees.
  * @param parser The parser to score the trees.
  */
 public static void updateStateSetTrees(List<Tree<StateSet>> trees, ArrayParser parser) {
   for (Tree<StateSet> tree : trees) {
     parser.doInsideOutsideScores(tree, false, false);
   }
 }