예제 #1
0
  public boolean isCorrect(String words, Exp sem, Parser parser) {
    List<ParseResult> parses = parser.bestParses();
    if (parses.size() > 0) {
      noAnswer = false;
    } else {
      noAnswer = true;
    }
    if (parses.size() == 1) {
      ParseResult p = parses.get(0);
      Exp e = p.getExp();
      e = e.copy();
      e.simplify();
      List l = p.getLexEntries();
      parsed++;
      if (e.equals(sem)) {
        if (verbose) {
          System.out.println("CORRECT");
          printLex(l);
        }
        int lits = sem.allLitsCount();
        correctParses++;

        return true;
      } else {
        // one parse, it was wrong... oh well...
        if (verbose) {
          System.out.println("WRONG");
          System.out.println(parses.size() + " parses: " + parses);
          printLex(l);
        }
        wrongParses++;

        boolean hasCorrect = parser.hasParseFor(sem);
        if (verbose) {
          System.out.println("Had correct parse: " + hasCorrect);
          System.out.print("Feats: ");
          Exp eb = parser.bestSem();
          Chart c = parser.getChart();
          HashVector h = c.computeExpFeatVals(eb);
          h.divideBy(c.computeNorm(eb));
          h.dropSmallEntries();
          System.out.println(h);
        }
      }
    } else {
      noParses++;
      if (parses.size() > 1) {
        // There are more than one equally high scoring
        // logical forms. If this is the case, we abstain
        // from returning a result.
        if (verbose) {
          System.out.println("too many parses");
          System.out.println(parses.size() + " parses: " + parses);
        }
        Exp e = parses.get(0).getExp();
        ParseResult p = parses.get(0);
        boolean hasCorrect = parser.hasParseFor(sem);
        if (verbose) System.out.println("Had correct parse: " + hasCorrect);
      } else {
        // no parses, potentially reparse with word skipping
        if (verbose) System.out.println("no parses");
        if (emptyTest) {
          List<LexEntry> emps = new LinkedList<LexEntry>();
          for (int j = 0; j < Globals.tokens.size(); j++) {
            List l = Globals.tokens.subList(j, j + 1);
            LexEntry le = new LexEntry(l, Cat.EMP);
            emps.add(le);
          }

          parser.setTempLexicon(new Lexicon(emps));
          String mes = null;
          if (verbose) mes = "EMPTY";
          parser.parseTimed(words, null, mes);
          parser.setTempLexicon(null);
          parses = parser.bestParses();
          if (parses.size() == 1) {
            ParseResult p = parses.get(0);
            List l = p.getLexEntries();
            Exp e = p.getExp();
            e = e.copy();
            e.simplify();
            int noEmpty = p.noEmpty();
            if (e.equals(sem)) {
              if (verbose) {
                System.out.println("CORRECT");
                printLex(l);
              }
              emptyCorrect++;

            } else {
              // one parse, but wrong
              if (verbose) {
                System.out.println("WRONG: " + e);
                printLex(l);
                boolean hasCorrect = parser.hasParseFor(sem);
                System.out.println("Had correct parse: " + hasCorrect);
              }
            }
          } else {
            // too many parses or no parses
            emptyNoParses++;
            if (verbose) {
              System.out.println("WRONG:" + parses);
              boolean hasCorrect = parser.hasParseFor(sem);
              System.out.println("Had correct parse: " + hasCorrect);
            }
          }
        }
      }
    }
    return false;
  }
예제 #2
0
  /**
   * Return a parsed chart of the input, pruning each cell to pruneN entries.
   *
   * <p>Uses the CKY algorithm.
   */
  public Chart parse(String input, Exp pruningSem, boolean computeInside) {

    List tokens = tokenize(input);
    Globals.tokens = tokens;
    Globals.lastWordIndex = tokens.size() - 1;

    // create a chart and add the input words
    Chart chart = new Chart(tokens);

    if (pruningSem != null) {
      chart.setPruningSem(pruningSem);
    }

    // initialize cells list with the lexical rule
    lexicon.addCells(chart);
    if (tempLex != null) {
      tempLex.addCells(chart);
    }

    int size = tokens.size();
    List temp = new LinkedList();
    // now do the CKY parsing:
    for (int len = 1; len < size; len++) {
      for (int begin = 0; begin < size - len; begin++) {
        for (int split = 0; split < len; split++) {
          temp.clear();
          Iterator leftIter = chart.getCellsIterator(begin, begin + split);
          while (leftIter.hasNext()) {
            Cell left = (Cell) leftIter.next();
            Iterator rightIter = chart.getCellsIterator(begin + split + 1, begin + len);
            while (rightIter.hasNext()) {
              Cell right = (Cell) rightIter.next();
              Iterator rules = binaryRules.iterator();
              while (rules.hasNext()) {
                ((BinaryParseRule) rules.next()).newCellsFrom(left, right, temp);
              }
            }
          }
          chart.addAll(temp);
        }
      }
    }

    //		System.out.println("fffffffffffffffff");
    //		for(ParseResult pr : chart.getParseResults()){
    //			System.out.println(pr + "#" + pr.getScore());
    //		}
    //		System.out.println("fffffffffffffffff");

    chart.pruneParseResults(pruneN);
    // store the parse results
    allParses = chart.getParseResults();

    allParses = removeRepeats(allParses);

    // find best parses for test and for generating
    // new lexical items.

    bestParses = findBestParses(allParses);

    //		System.out.println("&&&&&&&&&&&&&&&&&&&&&");
    //		System.out.println(input + " " + pruningSem);
    //		for (int len = size-1; len>=0; len--){
    //			for (int begin = 0; begin<size-len; begin++){
    //				Iterator i = chart.getCellsIterator(begin,begin+len);
    //				while (i.hasNext()){
    //					Cell c = (Cell)i.next();
    //					System.out.println("len = " + len + " begin = " + begin + "\n" + c.lex);
    //				}
    //			}
    //		}
    //		System.out.println("&&&&&&&&&&&&&&&&&&&&&");

    return chart;
  }
예제 #3
0
  public void stocGradTrain(Parser parser, boolean testEachRound) {

    int numUpdates = 0;

    List<LexEntry> fixedEntries = new LinkedList<LexEntry>();
    fixedEntries.addAll(parser.returnLex().getLexicon());

    // add all sentential lexical entries.
    for (int l = 0; l < trainData.size(); l++) {
      parser.addLexEntries(trainData.getDataSet(l).makeSentEntries());
    }
    parser.setGlobals();

    DataSet data = null;
    // for each pass over the data
    for (int j = 0; j < EPOCHS; j++) {
      System.out.println("Training, iteration " + j);
      int total = 0, correct = 0, wrong = 0, looCorrect = 0, looWrong = 0;
      for (int l = 0; l < trainData.size(); l++) {

        // the variables to hold the current training example
        String words = null;
        Exp sem = null;

        data = trainData.getDataSet(l);
        if (verbose) System.out.println("---------------------");
        String filename = trainData.getFilename(l);
        if (verbose) System.out.println("DataSet: " + filename);
        if (verbose) System.out.println("---------------------");

        // loop through the training examples
        // try to create lexical entries for each training example
        for (int i = 0; i < data.size(); i++) {
          // print running stats
          if (verbose) {
            if (total != 0) {
              double r = (double) correct / total;
              double p = (double) correct / (correct + wrong);
              System.out.print(i + ": =========== r:" + r + " p:" + p);
              System.out.println(" (epoch:" + j + " file:" + l + " " + filename + ")");
            } else System.out.println(i + ": ===========");
          }

          // get the training example
          words = data.sent(i);
          sem = data.sem(i);
          if (verbose) {
            System.out.println(words);
            System.out.println(sem);
          }

          List<String> tokens = Parser.tokenize(words);

          if (tokens.size() > maxSentLen) continue;
          total++;

          String mes = null;
          boolean hasCorrect = false;

          // first, get all possible lexical entries from
          // a manipulation of the best parse.
          List<LexEntry> lex = makeLexEntriesChart(words, sem, parser);

          if (verbose) {
            System.out.println("Adding:");
            for (LexEntry le : lex) {
              System.out.println(le + " : " + LexiconFeatSet.initialWeight(le));
            }
          }

          parser.addLexEntries(lex);

          if (verbose) System.out.println("Lex Size: " + parser.returnLex().size());

          // first parse to see if we are currently correct
          if (verbose) mes = "First";
          parser.parseTimed(words, null, mes);

          Chart firstChart = parser.getChart();
          Exp best = parser.bestSem();

          // this just collates and outputs the training
          // accuracy.
          if (sem.equals(best)) {
            // System.out.println(parser.bestParses().get(0));
            if (verbose) {
              System.out.println("CORRECT:" + best);
              lex = parser.getMaxLexEntriesFor(sem);
              System.out.println("Using:");
              printLex(lex);
              if (lex.size() == 0) {
                System.out.println("ERROR: empty lex");
              }
            }
            correct++;
          } else {
            if (verbose) {
              System.out.println("WRONG: " + best);
              lex = parser.getMaxLexEntriesFor(best);
              System.out.println("Using:");
              printLex(lex);
              if (best != null && lex.size() == 0) {
                System.out.println("ERROR: empty lex");
              }
            }
            wrong++;
          }

          // compute first half of parameter update:
          // subtract the expectation of parameters
          // under the distribution that is conditioned
          // on the sentence alone.
          double norm = firstChart.computeNorm();
          HashVector update = new HashVector();
          HashVector firstfeats = null, secondfeats = null;
          if (norm != 0.0) {
            firstfeats = firstChart.computeExpFeatVals();
            firstfeats.divideBy(norm);
            firstfeats.dropSmallEntries();
            firstfeats.addTimesInto(-1.0, update);
          } else continue;
          firstChart = null;

          if (verbose) mes = "Second";
          parser.parseTimed(words, sem, mes);
          hasCorrect = parser.hasParseFor(sem);

          // compute second half of parameter update:
          // add the expectation of parameters
          // under the distribution that is conditioned
          // on the sentence and correct logical form.
          if (!hasCorrect) continue;
          Chart secondChart = parser.getChart();
          double secnorm = secondChart.computeNorm(sem);
          if (norm != 0.0) {
            secondfeats = secondChart.computeExpFeatVals(sem);
            secondfeats.divideBy(secnorm);
            secondfeats.dropSmallEntries();
            secondfeats.addTimesInto(1.0, update);
            lex = parser.getMaxLexEntriesFor(sem);
            data.setBestLex(i, lex);
            if (verbose) {
              System.out.println("Best LexEntries:");
              printLex(lex);
              if (lex.size() == 0) {
                System.out.println("ERROR: empty lex");
              }
            }
          } else continue;

          // now do the update
          double scale = alpha_0 / (1.0 + c * numUpdates);
          if (verbose) System.out.println("Scale: " + scale);
          update.multiplyBy(scale);
          update.dropSmallEntries();

          numUpdates++;
          if (verbose) {
            System.out.println("Update:");
            System.out.println(update);
          }
          if (!update.isBad()) {
            if (!update.valuesInRange(-100, 100)) {
              System.out.println("WARNING: large update");
              System.out.println("first feats: " + firstfeats);
              System.out.println("second feats: " + secondfeats);
            }
            parser.updateParams(update);
          } else {
            System.out.println(
                "ERROR: Bad Update: " + update + " -- norm: " + norm + " -- feats: ");
            parser.getParams().printValues(update);
            System.out.println();
          }
        } // end for each training example
      } // end for each data set

      double r = (double) correct / total;

      // we can prune the lexical items that were not used
      // in a max scoring parse.
      if (pruneLex) {
        Lexicon cur = new Lexicon();
        cur.addLexEntries(fixedEntries);
        cur.addLexEntries(data.getBestLex());
        parser.setLexicon(cur);
      }

      if (testEachRound) {
        System.out.println("Testing");
        test(parser, false);
      }
    } // end epochs loop
  }