예제 #1
0
  private static List<Pair<TregexPattern, TsurgeonPattern>> loadOps() {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<>();

    String line = null;
    try {
      BufferedReader br = new BufferedReader(new StringReader(editStr));
      List<TsurgeonPattern> tsp = new ArrayList<>();
      while ((line = br.readLine()) != null) {
        if (DEBUG) System.err.print("Pattern is " + line);
        TregexPattern matchPattern = TregexPattern.compile(line);
        if (DEBUG) System.err.println(" [" + matchPattern + ']');
        tsp.clear();
        while (continuing(line = br.readLine())) {
          TsurgeonPattern p = Tsurgeon.parseOperation(line);
          if (DEBUG) System.err.println("Operation is " + line + " [" + p + ']');
          tsp.add(p);
        }
        if (!tsp.isEmpty()) {
          TsurgeonPattern tp = Tsurgeon.collectOperations(tsp);
          ops.add(new Pair<>(matchPattern, tp));
        }
      } // while not at end of file
    } catch (IOException ioe) {
      ioe.printStackTrace();
    }

    return ops;
  }
 public static TregexPattern getPattern(String tregex) {
   if (instance == null) {
     instance = new TregexPatternFactory();
   }
   Map<String, TregexPattern> myMap = instance.getMap();
   TregexPattern pattern = myMap.get(tregex);
   if (pattern == null) {
     try {
       pattern = TregexPattern.compile(tregex);
       myMap.put(tregex, pattern);
     } catch (Exception e) {
       e.printStackTrace();
     }
   }
   return pattern;
 }
예제 #3
0
  public static boolean filter(PhraseInfo phrase) {
    if (phrase == null || phrase.getTree() == null) return false;
    Tree phraseTree = phrase.getTree();

    // __ < ( NP < (PRP !< it|them) ) | < ( NP < ( NP < (PRP !< it|them) ) )
    String invalidPronounPattern =
        " ( NP < (PRP=prp !< " + Utils.wordsConjuction(Rules.VALID_PRONOUNS) + ") ) ";
    String filterPattern =
        "__ < " + invalidPronounPattern + " | < ( NP <" + invalidPronounPattern + ")";

    TregexPattern tregexPattern = TregexPattern.compile(filterPattern);
    TregexMatcher matcher = tregexPattern.matcher(phraseTree);

    if (matcher.matches()) {
      Proof proof = new Proof(ProofType.FAIL_PERSONAL_PRONOUN);
      Tree evdTree = matcher.getNode("prp");
      proof.setEvidenceTree(evdTree);
      phrase.addProof(proof);
      return false;
    }

    return true;
  }
예제 #4
0
public class ShiftReduceParserQuery implements ParserQuery {
  Debinarizer debinarizer = new Debinarizer(false);

  List<? extends HasWord> originalSentence;
  private State initialState, finalState;
  Tree debinarized;

  boolean success;
  boolean unparsable;

  final ShiftReduceParser parser;

  List<ParserConstraint> constraints = null;

  public ShiftReduceParserQuery(ShiftReduceParser parser) {
    this.parser = parser;
  }

  @Override
  public boolean parse(List<? extends HasWord> sentence) {
    this.originalSentence = sentence;
    initialState = ShiftReduceParser.initialStateFromTaggedSentence(sentence);
    return parseInternal();
  }

  public boolean parse(Tree tree) {
    this.originalSentence = tree.yieldHasWord();
    initialState = ShiftReduceParser.initialStateFromGoldTagTree(tree);
    return parseInternal();
  }

  // TODO: we are assuming that sentence final punctuation always has
  // either . or PU as the tag.
  private static TregexPattern rearrangeFinalPunctuationTregex =
      TregexPattern.compile(
          "__ !> __ <- (__=top <- (__ <<- (/[.]|PU/=punc < /[.!?。!?]/ ?> (__=single <: =punc))))");

  private static TsurgeonPattern rearrangeFinalPunctuationTsurgeon =
      Tsurgeon.parseOperation("[move punc >-1 top] [if exists single prune single]");

  private boolean parseInternal() {
    final int maxBeamSize = Math.max(parser.op.testOptions().beamSize, 1);

    success = true;
    unparsable = false;
    PriorityQueue<State> beam =
        new PriorityQueue<>(maxBeamSize + 1, ScoredComparator.ASCENDING_COMPARATOR);
    beam.add(initialState);
    // TODO: don't construct as many PriorityQueues
    while (beam.size() > 0) {
      // System.err.println("================================================");
      // System.err.println("Current beam:");
      // System.err.println(beam);
      PriorityQueue<State> oldBeam = beam;
      beam = new PriorityQueue<>(maxBeamSize + 1, ScoredComparator.ASCENDING_COMPARATOR);
      State bestState = null;
      for (State state : oldBeam) {
        Collection<ScoredObject<Integer>> predictedTransitions =
            parser.model.findHighestScoringTransitions(state, true, maxBeamSize, constraints);
        // System.err.println("Examining state: " + state);
        for (ScoredObject<Integer> predictedTransition : predictedTransitions) {
          Transition transition = parser.model.transitionIndex.get(predictedTransition.object());
          State newState = transition.apply(state, predictedTransition.score());
          // System.err.println("  Transition: " + transition + " (" + predictedTransition.score() +
          // ")");
          if (bestState == null || bestState.score() < newState.score()) {
            bestState = newState;
          }
          beam.add(newState);
          if (beam.size() > maxBeamSize) {
            beam.poll();
          }
        }
      }
      if (beam.size() == 0) {
        // Oops, time for some fallback plan
        // This can happen with the set of constraints given by the original paper
        // For example, one particular French model had a situation where it would reach
        //   @Ssub @Ssub .
        // without a left(Ssub) transition, so finishing the parse was impossible.
        // This will probably result in a bad parse, but at least it
        // will result in some sort of parse.
        for (State state : oldBeam) {
          Transition transition = parser.model.findEmergencyTransition(state, constraints);
          if (transition != null) {
            State newState = transition.apply(state);
            if (bestState == null || bestState.score() < newState.score()) {
              bestState = newState;
            }
            beam.add(newState);
          }
        }
      }

      // bestState == null only happens when we have failed to make progress, so quit
      // If the bestState is finished, we are done
      if (bestState == null || bestState.isFinished()) {
        break;
      }
    }
    List<State> bestParses;
    if (beam.size() == 0) {
      success = false;
      unparsable = true;
      debinarized = null;
      finalState = null;
      bestParses = Collections.emptyList();
    } else {
      // TODO: filter out beam elements that aren't finished
      bestParses = Generics.newArrayList(beam);
      Collections.sort(bestParses, beam.comparator());
      Collections.reverse(bestParses);
      finalState = bestParses.get(0);
      debinarized = debinarizer.transformTree(finalState.stack.peek());
      debinarized =
          Tsurgeon.processPattern(
              rearrangeFinalPunctuationTregex, rearrangeFinalPunctuationTsurgeon, debinarized);
    }
    return success;
  }

  /** TODO: if we add anything interesting to report, we should report it here */
  @Override
  public boolean parseAndReport(List<? extends HasWord> sentence, PrintWriter pwErr) {
    boolean success = parse(sentence);
    // System.err.println(getBestTransitionSequence());
    // System.err.println(getBestBinarizedParse());
    return success;
  }

  public Tree getBestBinarizedParse() {
    return finalState.stack.peek();
  }

  public List<Transition> getBestTransitionSequence() {
    return finalState.transitions.asList();
  }

  @Override
  public double getPCFGScore() {
    return finalState.score;
  }

  @Override
  public Tree getBestParse() {
    return debinarized;
  }

  /** TODO: can we get away with not calling this PCFG? */
  @Override
  public Tree getBestPCFGParse() {
    return debinarized;
  }

  @Override
  public Tree getBestDependencyParse(boolean debinarize) {
    return null;
  }

  @Override
  public Tree getBestFactoredParse() {
    return null;
  }

  /** TODO: if this is a beam, return all equal parses */
  @Override
  public List<ScoredObject<Tree>> getBestPCFGParses() {
    ScoredObject<Tree> parse = new ScoredObject<>(debinarized, finalState.score);
    return Collections.singletonList(parse);
  }

  @Override
  public boolean hasFactoredParse() {
    return false;
  }

  /** TODO: return more if this used a beam */
  @Override
  public List<ScoredObject<Tree>> getKBestPCFGParses(int kbestPCFG) {
    ScoredObject<Tree> parse = new ScoredObject<>(debinarized, finalState.score);
    return Collections.singletonList(parse);
  }

  @Override
  public List<ScoredObject<Tree>> getKGoodFactoredParses(int kbest) {
    throw new UnsupportedOperationException();
  }

  @Override
  public KBestViterbiParser getPCFGParser() {
    // TODO: find some way to treat this as a KBestViterbiParser?
    return null;
  }

  @Override
  public KBestViterbiParser getDependencyParser() {
    return null;
  }

  @Override
  public KBestViterbiParser getFactoredParser() {
    return null;
  }

  @Override
  public void setConstraints(List<ParserConstraint> constraints) {
    this.constraints = constraints;
  }

  @Override
  public boolean saidMemMessage() {
    return false;
  }

  @Override
  public boolean parseSucceeded() {
    return success;
  }

  /** TODO: skip sentences which are too long */
  @Override
  public boolean parseSkipped() {
    return false;
  }

  @Override
  public boolean parseFallback() {
    return false;
  }

  /** TODO: add memory handling? */
  @Override
  public boolean parseNoMemory() {
    return false;
  }

  @Override
  public boolean parseUnparsable() {
    return unparsable;
  }

  @Override
  public List<? extends HasWord> originalSentence() {
    return originalSentence;
  }

  /** TODO: clearly this should be a default method in ParserQuery once Java 8 comes out */
  @Override
  public void restoreOriginalWords(Tree tree) {
    if (originalSentence == null || tree == null) {
      return;
    }
    List<Tree> leaves = tree.getLeaves();
    if (leaves.size() != originalSentence.size()) {
      throw new IllegalStateException(
          "originalWords and sentence of different sizes: "
              + originalSentence.size()
              + " vs. "
              + leaves.size()
              + "\n Orig: "
              + Sentence.listToString(originalSentence)
              + "\n Pars: "
              + Sentence.listToString(leaves));
    }
    // TODO: get rid of this cast
    Iterator<? extends Label> wordsIterator =
        (Iterator<? extends Label>) originalSentence.iterator();
    for (Tree leaf : leaves) {
      leaf.setLabel(wordsIterator.next());
    }
  }
}