Пример #1
0
  /** Updates the agenda with of any unary rules that can be applied. */
  private void updateAgendaUnary(
      final Model model, final AgendaItem newItem, final PriorityQueue<AgendaItem> agenda) {
    final SyntaxTreeNode parse = newItem.getParse();
    final List<UnaryRule> ruleProductions = unaryRules.get(parse.getCategory());
    final int size = ruleProductions.size();
    if (size == 0) {
      return;
    }

    final boolean isNotPunctuationNode =
        parse.getRuleType() != RuleType.LP && parse.getRuleType() != RuleType.RP;
    for (int i = 0; i < size; i++) {
      final UnaryRule unaryRule = ruleProductions.get(i);
      if (isNotPunctuationNode || unaryRule.isTypeRaising()) {
        // Don't allow unary rules to apply to the output of non-type-raising rules.
        // i.e. don't allow both (NP (N ,))
        // The reason for allowing type-raising is to simplify Eisner Normal Form contraints (a
        // punctuation rule would mask the fact that a rule is the output of type-raising).
        // TODO should probably refactor the constraint into NormalForm.

        SyntaxTreeNodeUnary newNode;

        if (usingDependencies) {
          final List<UnlabelledDependency> resolvedDependencies = new ArrayList<>();
          newNode =
              new SyntaxTreeNodeUnary(
                  unaryRule.getResult(),
                  parse,
                  unaryRule
                      .getDependencyStructureTransformation()
                      .apply(parse.getDependencyStructure(), resolvedDependencies),
                  unaryRule,
                  resolvedDependencies);
        } else {
          newNode = new SyntaxTreeNodeUnary(unaryRule.getResult(), parse, null, unaryRule, null);
        }

        agenda.add(model.unary(newItem, newNode, unaryRule));
      }
    }
  }
Пример #2
0
  @Override
  protected List<Scored<SyntaxTreeNode>> parse(final InputToParser input) {
    ChartCellFactory sentenceCellFactory = cellFactory.forNewSentence();
    final List<InputWord> sentence = input.getInputWords();
    final Model model = modelFactory.make(input);
    final int sentenceLength = sentence.size();
    final PriorityQueue<AgendaItem> agenda = new PriorityQueue<>(1000);
    model.buildAgenda(agenda, sentence);
    final ChartCell[][] chart = new ChartCell[sentenceLength][sentenceLength];

    final List<Scored<SyntaxTreeNode>> result = new ArrayList<>(nbest);
    int chartSize = 0;

    // Track which cells in the chart are non-empty. This is helpful, because the A* chart is very
    // sparse compared
    // to CKY charts.
    final List<List<ChartCell>> cellsStartingAt = new ArrayList<>(sentenceLength + 1);
    final List<List<ChartCell>> cellsEndingAt = new ArrayList<>(sentenceLength + 1);
    for (int i = 0; i < sentenceLength + 1; i++) {
      cellsStartingAt.add(new ArrayList<>());
      cellsEndingAt.add(new ArrayList<>());
    }

    // Dummy final cell that the complete parses are stored in.
    final ChartCell finalCell = sentenceCellFactory.make();

    while (chartSize < maxChartSize
        && (result.isEmpty()
            || (result.size() < nbest
                && !agenda.isEmpty()
                && agenda.peek().getCost() > nbestBeam * result.get(0).getScore()))) {
      // Add items from the agenda, until we have enough parses.

      final AgendaItem agendaItem = agenda.poll();
      if (agendaItem == null) {
        break;
      }

      // Try to put an entry in the chart.
      ChartCell cell = chart[agendaItem.getStartOfSpan()][agendaItem.getSpanLength() - 1];
      if (cell == null) {
        cell = sentenceCellFactory.make();
        chart[agendaItem.getStartOfSpan()][agendaItem.getSpanLength() - 1] = cell;
        cellsStartingAt.get(agendaItem.getStartOfSpan()).add(cell);
        cellsEndingAt.get(agendaItem.getStartOfSpan() + agendaItem.getSpanLength()).add(cell);
      }

      if (cell.add(agendaItem)) {
        chartSize++;
        // If a new entry was added, update the agenda.

        // Is the new entry an acceptable complete parse?
        if (agendaItem.getSpanLength() == sentenceLength
            && agendaItem.getInsideScore() > Double.NEGATIVE_INFINITY
            && (possibleRootCategories.isEmpty()
                || possibleRootCategories.contains(agendaItem.getParse().getCategory()))
            &&
            // For N-best parsing, the final cell checks if that the final parse is unique. e.g. if
            // it's
            // dependencies are unique, ignoring the category
            finalCell.add("", agendaItem)) {
          result.add(new Scored<>(agendaItem.getParse(), agendaItem.getInsideScore()));
        }

        // See if any Unary Rules can be applied to the new entry.
        updateAgendaUnary(model, agendaItem, agenda);

        // See if the new entry can be the left argument of any binary rules.
        for (final ChartCell rightCell :
            cellsStartingAt.get(agendaItem.getStartOfSpan() + agendaItem.getSpanLength())) {
          for (final AgendaItem rightEntry : rightCell.getEntries()) {
            updateAgenda(agenda, agendaItem, rightEntry, model);
          }
        }

        // See if the new entry can be the right argument of any binary
        // rules.
        for (final ChartCell leftCell : cellsEndingAt.get(agendaItem.getStartOfSpan())) {
          for (final AgendaItem leftEntry : leftCell.getEntries()) {
            updateAgenda(agenda, leftEntry, agendaItem, model);
          }
        }
      }
    }

    if (result.size() == 0) {
      // Parse failure.
      return null;
    }

    return result;
  }
Пример #3
0
  /**
   * Updates the agenda with the result of all combinators that can be applied to leftChild and
   * rightChild.
   */
  private void updateAgenda(
      final PriorityQueue<AgendaItem> agenda,
      final AgendaItem left,
      final AgendaItem right,
      final Model model) {

    final SyntaxTreeNode leftChild = left.getParse();
    final SyntaxTreeNode rightChild = right.getParse();

    if (!seenRules.isSeen(leftChild.getCategory(), rightChild.getCategory())) {
      return;
    }
    final List<RuleProduction> rules = getRules(leftChild.getCategory(), rightChild.getCategory());

    final int size = rules.size();
    for (int i = 0; i < size; i++) {
      final RuleProduction production = rules.get(i);
      // Check if normal-form constraints let us add this rule.
      if (NormalForm.isOk(
          leftChild.getRuleClass(),
          rightChild.getRuleClass(),
          production.getRuleType(),
          leftChild.getCategory(),
          rightChild.getCategory(),
          production.getCategory(),
          left.getStartOfSpan() == 0)) {

        final SyntaxTreeNodeBinary newNode;
        if (usingDependencies) {
          // Update all the information for tracking dependencies.
          final List<UnlabelledDependency> resolvedDependencies = new ArrayList<>();
          final DependencyStructure newDependencies =
              production
                  .getCombinator()
                  .apply(
                      leftChild.getDependencyStructure(),
                      rightChild.getDependencyStructure(),
                      resolvedDependencies);

          final boolean headIsLeft =
              newDependencies.getArbitraryHead()
                  == leftChild.getDependencyStructure().getArbitraryHead();

          newNode =
              new SyntaxTreeNodeBinary(
                  production.getCategory(),
                  leftChild,
                  rightChild,
                  production.getRuleType(),
                  headIsLeft,
                  newDependencies,
                  resolvedDependencies);

        } else {
          // If we're not modeling dependencies, we can save a lot of work.
          newNode =
              new SyntaxTreeNodeBinary(
                  production.getCategory(),
                  leftChild,
                  rightChild,
                  production.getRuleType(),
                  production.isHeadIsLeft(),
                  null,
                  null);
        }

        agenda.add(model.combineNodes(left, right, newNode));
      }
    }
  }