Exemple #1
   * Returns the sentence from its tree representation.
   * @param t the tree representation of the sentence
   * @return the sentence
  public static String tree2Words(Tree t) {
    StringBuilder buffer = new StringBuilder();

    List<Tree> leaves = t.getLeaves();
    for (Tree leaf : leaves) {
      String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

      // TODO maybe double check preceding whitespaces, because transformations could have
      // resulted in the situation that the trailing
      // whitespaces of out last tokens is not the same as the preceding whitespaces of out
      // current token BUT: This has also to be done in getTokenListFromTree(...)

      // now add the trailing whitespaces
      String trailingWhitespaces =
          ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
      // if no whitespace-info is available, insert a whitespace this may happen for nodes
      // inserted by TSurgeon operations
      if (trailingWhitespaces == null) {
        trailingWhitespaces = " ";


    return buffer.toString();
  * terse representation of a (sub-)tree: NP[the white dog] -vs- (NP (DT the) (JJ white) (NN dog))
 public static String abbrevTree(Tree tree) {
   ArrayList<String> toks = new ArrayList();
   for (Tree L : tree.getLeaves()) {
   return tree.label().toString() + "[" + StringUtils.join(toks, " ") + "]";
  private static void printSentenceParseTree(String sentence) {
        "ParsedSentence being analyzed: \"" + sentence + "\"\n---------------------------");

    sentence = sentence.replaceAll("\\.", "");

    final ParsedSentence parsedSentence = StanfordCoreNlpClient.parseSentence(sentence, false);
    final Tree tree = parsedSentence.getPosTree();
    final List<Tree> trees = tree.getChild(0).getChildrenAsList();
    for (final Tree part : trees) {


    final List<Tree> leaves = tree.getLeaves();
    for (final Tree leaf : leaves) {
      System.out.printf("(%s - %s), ", leaf.parent(tree).label(), leaf);

   * This method creates a string which represents the part of the sentence this <code>tree</code>
   * stands for.
   * @param tree A (partial) syntax tree
   * @return The original sentence part
  public static String printTree(Tree tree) {
    final StringBuilder sb = new StringBuilder();

    for (final Tree t : tree.getLeaves()) {
      sb.append(t.toString()).append(" ");
    return sb.toString().trim();
  private static String toString(Tree tree, boolean plainPrint) {
    if (!plainPrint) return tree.toString();

    StringBuilder sb = new StringBuilder();
    List<Tree> leaves = tree.getLeaves();
    for (Tree leaf : leaves) sb.append(((CoreLabel) leaf.label()).value()).append(' ');

    return sb.toString();
 public Tense calculateTense(String clause) {
   final Tree posTree = getPosTree(clause);
   final Tree word = posTree.getLeaves().get(0);
   final String pos = word.parent(posTree).label().value().toLowerCase();
   if (pos.equals("md")) {
     return Tense.FUTURE;
   if (pos.equals("vbd") || pos.equals("vbn")) {
     return Tense.PAST;
   return Tense.PRESENT;
  private LabeledSentence generateSupersenseTaggingInput(Tree sentence) {
    LabeledSentence res = new LabeledSentence();
    List<Tree> leaves = sentence.getLeaves();

    for (int i = 0; i < leaves.size(); i++) {
      String word = leaves.get(i).label().toString();
      Tree preterm = leaves.get(i).parent(sentence);
      String pos = preterm.label().toString();
      String stem = AnalysisUtilities.getInstance().getLemma(word, pos);
      res.addToken(word, stem, pos, "0");

    return res;
  public static ArrayList<ArrayList<TaggedWord>> getPhrases(Tree parse, int phraseSizeLimit) {
    ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>();
    List<Tree> leaves = parse.getLeaves();

    if (leaves.size() <= phraseSizeLimit) {
      // ArrayList<TaggedWord> phraseElements = PreprocessPhrase(parse.taggedYield());
      ArrayList<TaggedWord> phraseElements = Preprocess(parse.taggedYield());
      if (phraseElements.size() > 0) newList.add(phraseElements);
    } else {
      Tree[] childrenNodes = parse.children();
      for (int i = 0; i < childrenNodes.length; i++) {
        Tree currentParse = childrenNodes[i];
        newList.addAll(getPhrases(currentParse, phraseSizeLimit));
    return newList;
Exemple #9
   * Returns a list of Token annotations from a Tree-object
   * @param aJCas a JCas.
   * @param t a tree.
   * @return the tokens.
  public static List<Token> getTokenListFromTree(JCas aJCas, Tree t) {
    List<Token> tokenList = new ArrayList<Token>();
    int index = 0;
    for (Tree leaf : t.getLeaves()) {

      String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

      tokenList.add(new Token(aJCas, index, index + word.length()));

      // get trailing whitespaces to calculate next index
      String whiteSpaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
      if (whiteSpaces == null) {
        whiteSpaces = " ";

      index += word.length() + whiteSpaces.length();
    return tokenList;
Exemple #10
 void initRandomWordVectors(List<Tree> trainingTrees) {
   if (op.numHid == 0) {
     throw new RuntimeException("Cannot create random word vectors for an unknown numHid");
   Set<String> words = Generics.newHashSet();
   for (Tree tree : trainingTrees) {
     List<Tree> leaves = tree.getLeaves();
     for (Tree leaf : leaves) {
       String word = leaf.label().value();
       if (op.lowercaseWordVectors) {
         word = word.toLowerCase();
   this.wordVectors = Generics.newTreeMap();
   for (String word : words) {
     SimpleMatrix vector = randomWordVector();
     wordVectors.put(word, vector);
Exemple #11
  private String getHeadNoun(String uri) {
    String[] tokens = lexicalize(uri);

    // if we have multiple tokens, get the head noun
    String head;
    if (tokens.length > 1) {
      head = Joiner.on(" ").join(tokens);

      Annotation document = new Annotation(head);

      CoreMap sentence = document.get(SentencesAnnotation.class).get(0);
      Tree tree = sentence.get(TreeAnnotation.class);

      Tree headTree = headFinder.determineHead(tree);
      // we assume that the last occurring NN is the head noun
      List<Tree> leaves = headTree.getLeaves();
      head = leaves.get(leaves.size() - 1).label().value();
    } else {
      head = tokens[0];
    return head;
 /** TODO: clearly this should be a default method in ParserQuery once Java 8 comes out */
 public void restoreOriginalWords(Tree tree) {
   if (originalSentence == null || tree == null) {
   List<Tree> leaves = tree.getLeaves();
   if (leaves.size() != originalSentence.size()) {
     throw new IllegalStateException(
         "originalWords and sentence of different sizes: "
             + originalSentence.size()
             + " vs. "
             + leaves.size()
             + "\n Orig: "
             + Sentence.listToString(originalSentence)
             + "\n Pars: "
             + Sentence.listToString(leaves));
   // TODO: get rid of this cast
   Iterator<? extends Label> wordsIterator =
       (Iterator<? extends Label>) originalSentence.iterator();
   for (Tree leaf : leaves) {
 public Tree getSyntacticHeadTree() {
   Tree tree = sentence.get(TreeAnnotation.class);
   return tree.getLeaves().get(syntacticHeadTokenPosition);
   * This method searches for an index word in a sentence tree
   * @param wordToFind
   * @param treeToSearch
   * @param expectedPOS The expected POS tag for the result. If this is NULL, the method tries to
   *     find a phrase.
   * @param canGoUp If TRUE the method will walk up the tree to find a phrase.
   * @param skip Set to "1" if you want to find the phrase for "in front of". Set to "0" otherwise.
   * @return The largest matching tree.
  public static Tree match(
      IndexedWord wordToFind, Tree treeToSearch, String expectedPOS, boolean canGoUp, int skip) {
    int end = wordToFind.get(EndIndexAnnotation.class);
    int begin = wordToFind.get(BeginIndexAnnotation.class);

    // first, find whatever is at the word's index
    for (Tree tree : treeToSearch) {
      CoreLabel lbl = ((CoreLabel) tree.label());

      if (lbl != null
          && lbl.get(EndIndexAnnotation.class) != null
          && lbl.get(EndIndexAnnotation.class) == end) {
        if (lbl.get(BeginIndexAnnotation.class) == begin) {
          // we found the first subtree at the word's index
          // now, check if the word here is our searchword
          if (tree.getLeaves().get(0).label().value().equals(wordToFind.value())) {
            // we have found the label.
            Tree candidate = tree;

            if (expectedPOS != null) {
              // if we know our desired POS, just keep walking up the tree to find the first
              // instance of the expected pos
              while (!expectedPOS.equals(candidate.value())) {
                // if we don't have the right POS, just try our parent
                candidate = candidate.parent(treeToSearch);

                if (candidate == null) {
                  return null;
              candidate = skip(candidate, treeToSearch, expectedPOS, skip);
            } else {
              // else walk up the tree again to find the corresponding phrase
              while (!candidate.isPhrasal()) {
                candidate =
                    candidate.parent(treeToSearch); // edu.stanford.nlp.trees.Tree.parent(Tree root)

                if (candidate == null) {
                  return null;

            if (canGoUp) {
              // now keep walking as long as the phrase does not change. this should yield the
              // largest representative phrase for this word.
              String phrase = candidate.value();
              while (phrase.equals(candidate.parent(treeToSearch).value())) {
                candidate = candidate.parent(treeToSearch);

                if (candidate == null) {
                  return null;
            return candidate;
    return null;
  public List<String> annotateSentenceWithSupersenses(Tree sentence) {
    List<String> result = new ArrayList<String>();

    int numleaves = sentence.getLeaves().size();
    if (numleaves <= 1) {
      return result;
    LabeledSentence labeled = generateSupersenseTaggingInput(sentence);

    // see if a NER socket server is available
    int port = new Integer(ARKref.getProperties().getProperty("supersenseServerPort", "5557"));
    String host = "";
    Socket client;
    PrintWriter pw;
    BufferedReader br;
    String line;
    try {
      client = new Socket(host, port);

      pw = new PrintWriter(client.getOutputStream());
      br = new BufferedReader(new InputStreamReader(client.getInputStream()));
      String inputStr = "";
      for (int i = 0; i < labeled.length(); i++) {
        String token = labeled.getTokens().get(i);
        String stem = labeled.getStems().get(i);
        String pos = labeled.getPOS().get(i);
        inputStr += token + "\t" + stem + "\t" + pos + "\n";
      pw.flush(); // flush to complete the transmission

      while ((line = br.readLine()) != null) {
        String[] parts = line.split("\\t");

    } catch (Exception ex) {
      if (ARKref.Opts.debug) System.err.println("Could not connect to SST server.");
      // ex.printStackTrace();

    // if socket server not available, then use a local NER object
    if (result.size() == 0) {
      try {
        if (sst == null) {
          sst =
                      .getProperty("supersenseModelFile", "config/supersenseModel.ser.gz"));
        sst.findBestLabelSequenceViterbi(labeled, sst.getWeights());
        for (String pred : labeled.getPredictions()) {
      } catch (Exception e) {

    // add a bunch of blanks if necessary
    while (result.size() < numleaves) result.add("0");

    if (ARKref.Opts.debug) System.err.println("annotateSentenceSST: " + result);
    return result;
  private PropertyList addLexicoSyntacticFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {

    int arg1Line = candidate.first();
    Tree root = doc.getTree(arg1Line);
    int arg1HeadPos = candidate.second();

    boolean attributive = false;
    String head = root.getLeaves().get(arg1HeadPos).value();
    for (String verb : attributiveVerb) {
      if (head.matches(verb)) {
        attributive = true;

    pl = PropertyList.add("U=" + attributive, 1.0, pl);
    SimpleDepGraph depGraph = doc.getDepGraph(arg1Line);

    boolean hasClausalComp = false;
    List<SimpleDependency> govDependencies = depGraph.getGovDependencies(arg1HeadPos);
    for (SimpleDependency dep : govDependencies) {
      if (dep.reln().equals("ccomp")) {
        hasClausalComp = true;

    pl = PropertyList.add("V=" + hasClausalComp, 1.0, pl);
    pl = PropertyList.add("W=" + attributive + "&" + hasClausalComp, 1.0, pl);

    boolean isClausalComp = false;
    List<SimpleDependency> depDependencies = depGraph.getDepDependencies(arg1HeadPos);
    SimpleDependency clausalComp = null;
    for (SimpleDependency dep : depDependencies) {
      if (dep.reln().equals("ccomp")) {
        isClausalComp = true;
        clausalComp = dep;

    pl = PropertyList.add("X=" + isClausalComp, 1.0, pl);
    if (isClausalComp) {
      int gov = clausalComp.gov();
      String govWord = root.getLeaves().get(gov).value();
      boolean isGovAttributive = false;
      for (String verb : attributiveVerb) {
        if (govWord.matches(verb)) {
          isGovAttributive = true;
      pl = PropertyList.add("Y=" + isClausalComp + "&" + isGovAttributive, 1.0, pl);

    return pl;
  private PropertyList addConstituentFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {
    Sentence arg2Sentence = doc.getSentence(arg2Line);
    String conn = arg2Sentence.toString(connStart, connEnd);
    int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd);

    int arg1Line = candidate.first();
    Tree arg1Tree = doc.getTree(arg1Line);
    int arg1HeadPos = candidate.second();

    List<String> path = new ArrayList<String>();
    List<String> pathWithoutPOS = new ArrayList<String>();

    if (arg1Line == arg2Line) {
      Tree root = arg1Tree;
      List<Tree> leaves = root.getLeaves();
      List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos));
      if (treePath != null) {
        for (Tree t : treePath) {
          if (!t.isLeaf()) {
            if (!t.isPreTerminal()) {
    } else {
      Tree arg2Root = arg2Sentence.getParseTree();
      Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0));
      List<Tree> leaves = arg2Root.getLeaves();
      int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead);
      if (mainHeadPos != -1) {
        List<Tree> treePath =
            arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos));
        if (treePath != null) {
          for (Tree t : treePath) {
            if (!t.isLeaf()) {
              if (!t.isPreTerminal()) {
      for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) {
      Tree arg1Root = arg1Tree;
      mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0));
      leaves = arg1Root.getLeaves();
      mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead);
      if (mainHeadPos != -1) {
        List<Tree> treePath =
            arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos));
        if (treePath != null) {
          for (Tree t : treePath) {
            if (!t.isLeaf()) {
              if (!t.isPreTerminal()) {
    // H-full path
    // L-C&H
    StringBuilder fullPath = new StringBuilder();
    for (String node : path) {
    pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl);
    pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl);

    // I-length of path
    pl = PropertyList.add("I=" + path.size(), 1.0, pl);

    // J-collapsed path without part of speech
    // K-collapsed path without repititions
    fullPath = new StringBuilder();
    StringBuilder collapsedPath = new StringBuilder();
    String prev = "";
    for (String node : pathWithoutPOS) {
      if (!node.equals(prev)) {
      prev = node;
    pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl);
    pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl);

    return pl;