Esempio n. 1
0
  /**
   * Returns the sentence from its tree representation.
   *
   * @param t the tree representation of the sentence
   * @return the sentence
   */
  public static String tree2Words(Tree t) {
    StringBuilder buffer = new StringBuilder();

    List<Tree> leaves = t.getLeaves();
    for (Tree leaf : leaves) {
      String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

      // TODO maybe double check preceding whitespaces, because transformations could have
      // resulted in the situation that the trailing
      // whitespaces of out last tokens is not the same as the preceding whitespaces of out
      // current token BUT: This has also to be done in getTokenListFromTree(...)

      // now add the trailing whitespaces
      String trailingWhitespaces =
          ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
      // if no whitespace-info is available, insert a whitespace this may happen for nodes
      // inserted by TSurgeon operations
      if (trailingWhitespaces == null) {
        trailingWhitespaces = " ";
      }

      buffer.append(word).append(trailingWhitespaces);
    }

    return buffer.toString();
  }
 /**
  * terse representation of a (sub-)tree: NP[the white dog] -vs- (NP (DT the) (JJ white) (NN dog))
  */
 public static String abbrevTree(Tree tree) {
   ArrayList<String> toks = new ArrayList();
   for (Tree L : tree.getLeaves()) {
     toks.add(L.label().toString());
   }
   return tree.label().toString() + "[" + StringUtils.join(toks, " ") + "]";
 }
Esempio n. 3
0
  private static void printSentenceParseTree(String sentence) {
    System.out.println(
        "ParsedSentence being analyzed: \"" + sentence + "\"\n---------------------------");

    sentence = sentence.replaceAll("\\.", "");

    final ParsedSentence parsedSentence = StanfordCoreNlpClient.parseSentence(sentence, false);
    final Tree tree = parsedSentence.getPosTree();
    final List<Tree> trees = tree.getChild(0).getChildrenAsList();
    for (final Tree part : trees) {
      System.out.print(part.label());
      System.out.println(part);
    }
    tree.indentedListPrint();

    System.out.println();

    final List<Tree> leaves = tree.getLeaves();
    for (final Tree leaf : leaves) {
      System.out.printf("(%s - %s), ", leaf.parent(tree).label(), leaf);
    }
    System.out.println("\n");

    System.out.println(parsedSentence.getDependencies());
  }
  /**
   * This method creates a string which represents the part of the sentence this <code>tree</code>
   * stands for.
   *
   * @param tree A (partial) syntax tree
   * @return The original sentence part
   */
  public static String printTree(Tree tree) {
    final StringBuilder sb = new StringBuilder();

    for (final Tree t : tree.getLeaves()) {
      sb.append(t.toString()).append(" ");
    }
    return sb.toString().trim();
  }
  private static String toString(Tree tree, boolean plainPrint) {
    if (!plainPrint) return tree.toString();

    StringBuilder sb = new StringBuilder();
    List<Tree> leaves = tree.getLeaves();
    for (Tree leaf : leaves) sb.append(((CoreLabel) leaf.label()).value()).append(' ');

    return sb.toString();
  }
 public Tense calculateTense(String clause) {
   final Tree posTree = getPosTree(clause);
   final Tree word = posTree.getLeaves().get(0);
   final String pos = word.parent(posTree).label().value().toLowerCase();
   if (pos.equals("md")) {
     return Tense.FUTURE;
   }
   if (pos.equals("vbd") || pos.equals("vbn")) {
     return Tense.PAST;
   }
   return Tense.PRESENT;
 }
  private LabeledSentence generateSupersenseTaggingInput(Tree sentence) {
    LabeledSentence res = new LabeledSentence();
    List<Tree> leaves = sentence.getLeaves();

    for (int i = 0; i < leaves.size(); i++) {
      String word = leaves.get(i).label().toString();
      Tree preterm = leaves.get(i).parent(sentence);
      String pos = preterm.label().toString();
      String stem = AnalysisUtilities.getInstance().getLemma(word, pos);
      res.addToken(word, stem, pos, "0");
    }

    return res;
  }
  public static ArrayList<ArrayList<TaggedWord>> getPhrases(Tree parse, int phraseSizeLimit) {
    ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>();
    List<Tree> leaves = parse.getLeaves();

    if (leaves.size() <= phraseSizeLimit) {
      // ArrayList<TaggedWord> phraseElements = PreprocessPhrase(parse.taggedYield());
      ArrayList<TaggedWord> phraseElements = Preprocess(parse.taggedYield());
      if (phraseElements.size() > 0) newList.add(phraseElements);
    } else {
      Tree[] childrenNodes = parse.children();
      for (int i = 0; i < childrenNodes.length; i++) {
        Tree currentParse = childrenNodes[i];
        newList.addAll(getPhrases(currentParse, phraseSizeLimit));
      }
    }
    return newList;
  }
Esempio n. 9
0
  /**
   * Returns a list of Token annotations from a Tree-object
   *
   * @param aJCas a JCas.
   * @param t a tree.
   * @return the tokens.
   */
  public static List<Token> getTokenListFromTree(JCas aJCas, Tree t) {
    List<Token> tokenList = new ArrayList<Token>();
    int index = 0;
    for (Tree leaf : t.getLeaves()) {

      String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

      tokenList.add(new Token(aJCas, index, index + word.length()));

      // get trailing whitespaces to calculate next index
      String whiteSpaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
      if (whiteSpaces == null) {
        whiteSpaces = " ";
      }

      index += word.length() + whiteSpaces.length();
    }
    return tokenList;
  }
Esempio n. 10
0
 void initRandomWordVectors(List<Tree> trainingTrees) {
   if (op.numHid == 0) {
     throw new RuntimeException("Cannot create random word vectors for an unknown numHid");
   }
   Set<String> words = Generics.newHashSet();
   words.add(UNKNOWN_WORD);
   for (Tree tree : trainingTrees) {
     List<Tree> leaves = tree.getLeaves();
     for (Tree leaf : leaves) {
       String word = leaf.label().value();
       if (op.lowercaseWordVectors) {
         word = word.toLowerCase();
       }
       words.add(word);
     }
   }
   this.wordVectors = Generics.newTreeMap();
   for (String word : words) {
     SimpleMatrix vector = randomWordVector();
     wordVectors.put(word, vector);
   }
 }
Esempio n. 11
0
  private String getHeadNoun(String uri) {
    String[] tokens = lexicalize(uri);

    // if we have multiple tokens, get the head noun
    String head;
    if (tokens.length > 1) {
      head = Joiner.on(" ").join(tokens);

      Annotation document = new Annotation(head);
      pipeline.annotate(document);

      CoreMap sentence = document.get(SentencesAnnotation.class).get(0);
      Tree tree = sentence.get(TreeAnnotation.class);

      Tree headTree = headFinder.determineHead(tree);
      // we assume that the last occurring NN is the head noun
      List<Tree> leaves = headTree.getLeaves();
      head = leaves.get(leaves.size() - 1).label().value();
    } else {
      head = tokens[0];
    }
    return head;
  }
 /** TODO: clearly this should be a default method in ParserQuery once Java 8 comes out */
 @Override
 public void restoreOriginalWords(Tree tree) {
   if (originalSentence == null || tree == null) {
     return;
   }
   List<Tree> leaves = tree.getLeaves();
   if (leaves.size() != originalSentence.size()) {
     throw new IllegalStateException(
         "originalWords and sentence of different sizes: "
             + originalSentence.size()
             + " vs. "
             + leaves.size()
             + "\n Orig: "
             + Sentence.listToString(originalSentence)
             + "\n Pars: "
             + Sentence.listToString(leaves));
   }
   // TODO: get rid of this cast
   Iterator<? extends Label> wordsIterator =
       (Iterator<? extends Label>) originalSentence.iterator();
   for (Tree leaf : leaves) {
     leaf.setLabel(wordsIterator.next());
   }
 }
Esempio n. 13
0
 public Tree getSyntacticHeadTree() {
   Tree tree = sentence.get(TreeAnnotation.class);
   return tree.getLeaves().get(syntacticHeadTokenPosition);
 }
Esempio n. 14
0
  /**
   * This method searches for an index word in a sentence tree
   *
   * @param wordToFind
   * @param treeToSearch
   * @param expectedPOS The expected POS tag for the result. If this is NULL, the method tries to
   *     find a phrase.
   * @param canGoUp If TRUE the method will walk up the tree to find a phrase.
   * @param skip Set to "1" if you want to find the phrase for "in front of". Set to "0" otherwise.
   * @return The largest matching tree.
   */
  public static Tree match(
      IndexedWord wordToFind, Tree treeToSearch, String expectedPOS, boolean canGoUp, int skip) {
    int end = wordToFind.get(EndIndexAnnotation.class);
    int begin = wordToFind.get(BeginIndexAnnotation.class);

    // first, find whatever is at the word's index
    for (Tree tree : treeToSearch) {
      CoreLabel lbl = ((CoreLabel) tree.label());

      if (lbl != null
          && lbl.get(EndIndexAnnotation.class) != null
          && lbl.get(EndIndexAnnotation.class) == end) {
        if (lbl.get(BeginIndexAnnotation.class) == begin) {
          // we found the first subtree at the word's index
          // now, check if the word here is our searchword
          if (tree.getLeaves().get(0).label().value().equals(wordToFind.value())) {
            // we have found the label.
            Tree candidate = tree;

            if (expectedPOS != null) {
              // if we know our desired POS, just keep walking up the tree to find the first
              // instance of the expected pos
              while (!expectedPOS.equals(candidate.value())) {
                // if we don't have the right POS, just try our parent
                candidate = candidate.parent(treeToSearch);

                if (candidate == null) {
                  return null;
                }
              }
              candidate = skip(candidate, treeToSearch, expectedPOS, skip);
            } else {
              // else walk up the tree again to find the corresponding phrase
              while (!candidate.isPhrasal()) {
                candidate =
                    candidate.parent(treeToSearch); // edu.stanford.nlp.trees.Tree.parent(Tree root)

                if (candidate == null) {
                  return null;
                }
              }
            }

            if (canGoUp) {
              // now keep walking as long as the phrase does not change. this should yield the
              // largest representative phrase for this word.
              String phrase = candidate.value();
              while (phrase.equals(candidate.parent(treeToSearch).value())) {
                candidate = candidate.parent(treeToSearch);

                if (candidate == null) {
                  return null;
                }
              }
            }
            return candidate;
          }
        }
      }
    }
    return null;
  }
  public List<String> annotateSentenceWithSupersenses(Tree sentence) {
    List<String> result = new ArrayList<String>();

    int numleaves = sentence.getLeaves().size();
    if (numleaves <= 1) {
      return result;
    }
    LabeledSentence labeled = generateSupersenseTaggingInput(sentence);

    // see if a NER socket server is available
    int port = new Integer(ARKref.getProperties().getProperty("supersenseServerPort", "5557"));
    String host = "127.0.0.1";
    Socket client;
    PrintWriter pw;
    BufferedReader br;
    String line;
    try {
      client = new Socket(host, port);

      pw = new PrintWriter(client.getOutputStream());
      br = new BufferedReader(new InputStreamReader(client.getInputStream()));
      String inputStr = "";
      for (int i = 0; i < labeled.length(); i++) {
        String token = labeled.getTokens().get(i);
        String stem = labeled.getStems().get(i);
        String pos = labeled.getPOS().get(i);
        inputStr += token + "\t" + stem + "\t" + pos + "\n";
      }
      pw.println(inputStr);
      pw.flush(); // flush to complete the transmission

      while ((line = br.readLine()) != null) {
        String[] parts = line.split("\\t");
        result.add(parts[2]);
      }
      br.close();
      pw.close();
      client.close();

    } catch (Exception ex) {
      if (ARKref.Opts.debug) System.err.println("Could not connect to SST server.");
      // ex.printStackTrace();
    }

    // if socket server not available, then use a local NER object
    if (result.size() == 0) {
      try {
        if (sst == null) {
          DiscriminativeTagger.loadProperties(ARKref.getPropertiesPath());
          sst =
              DiscriminativeTagger.loadModel(
                  ARKref.getProperties()
                      .getProperty("supersenseModelFile", "config/supersenseModel.ser.gz"));
        }
        sst.findBestLabelSequenceViterbi(labeled, sst.getWeights());
        for (String pred : labeled.getPredictions()) {
          result.add(pred);
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }

    // add a bunch of blanks if necessary
    while (result.size() < numleaves) result.add("0");

    if (ARKref.Opts.debug) System.err.println("annotateSentenceSST: " + result);
    return result;
  }
  private PropertyList addLexicoSyntacticFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {

    int arg1Line = candidate.first();
    Tree root = doc.getTree(arg1Line);
    int arg1HeadPos = candidate.second();

    boolean attributive = false;
    String head = root.getLeaves().get(arg1HeadPos).value();
    for (String verb : attributiveVerb) {
      if (head.matches(verb)) {
        attributive = true;
        break;
      }
    }

    pl = PropertyList.add("U=" + attributive, 1.0, pl);
    SimpleDepGraph depGraph = doc.getDepGraph(arg1Line);

    boolean hasClausalComp = false;
    List<SimpleDependency> govDependencies = depGraph.getGovDependencies(arg1HeadPos);
    for (SimpleDependency dep : govDependencies) {
      if (dep.reln().equals("ccomp")) {
        hasClausalComp = true;
        break;
      }
    }

    pl = PropertyList.add("V=" + hasClausalComp, 1.0, pl);
    pl = PropertyList.add("W=" + attributive + "&" + hasClausalComp, 1.0, pl);

    boolean isClausalComp = false;
    List<SimpleDependency> depDependencies = depGraph.getDepDependencies(arg1HeadPos);
    SimpleDependency clausalComp = null;
    for (SimpleDependency dep : depDependencies) {
      if (dep.reln().equals("ccomp")) {
        isClausalComp = true;
        clausalComp = dep;
        break;
      }
    }

    pl = PropertyList.add("X=" + isClausalComp, 1.0, pl);
    if (isClausalComp) {
      int gov = clausalComp.gov();
      String govWord = root.getLeaves().get(gov).value();
      boolean isGovAttributive = false;
      for (String verb : attributiveVerb) {
        if (govWord.matches(verb)) {
          isGovAttributive = true;
          break;
        }
      }
      pl = PropertyList.add("Y=" + isClausalComp + "&" + isGovAttributive, 1.0, pl);
    }

    return pl;
  }
  private PropertyList addConstituentFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {
    Sentence arg2Sentence = doc.getSentence(arg2Line);
    String conn = arg2Sentence.toString(connStart, connEnd);
    int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd);

    int arg1Line = candidate.first();
    Tree arg1Tree = doc.getTree(arg1Line);
    int arg1HeadPos = candidate.second();

    List<String> path = new ArrayList<String>();
    List<String> pathWithoutPOS = new ArrayList<String>();

    if (arg1Line == arg2Line) {
      Tree root = arg1Tree;
      List<Tree> leaves = root.getLeaves();
      List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos));
      if (treePath != null) {
        for (Tree t : treePath) {
          if (!t.isLeaf()) {
            path.add(t.value());
            if (!t.isPreTerminal()) {
              pathWithoutPOS.add(t.value());
            }
          }
        }
      }
    } else {
      Tree arg2Root = arg2Sentence.getParseTree();
      Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0));
      List<Tree> leaves = arg2Root.getLeaves();
      int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead);
      if (mainHeadPos != -1) {
        List<Tree> treePath =
            arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos));
        if (treePath != null) {
          for (Tree t : treePath) {
            if (!t.isLeaf()) {
              path.add(t.value());
              if (!t.isPreTerminal()) {
                pathWithoutPOS.add(t.value());
              }
            }
          }
        }
      }
      for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) {
        path.add("SENT");
        pathWithoutPOS.add("SENT");
      }
      Tree arg1Root = arg1Tree;
      mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0));
      leaves = arg1Root.getLeaves();
      mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead);
      if (mainHeadPos != -1) {
        List<Tree> treePath =
            arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos));
        if (treePath != null) {
          for (Tree t : treePath) {
            if (!t.isLeaf()) {
              path.add(t.value());
              if (!t.isPreTerminal()) {
                pathWithoutPOS.add(t.value());
              }
            }
          }
        }
      }
    }
    // H-full path
    // L-C&H
    StringBuilder fullPath = new StringBuilder();
    for (String node : path) {
      fullPath.append(node).append(":");
    }
    pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl);
    pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl);

    // I-length of path
    pl = PropertyList.add("I=" + path.size(), 1.0, pl);

    // J-collapsed path without part of speech
    // K-collapsed path without repititions
    fullPath = new StringBuilder();
    StringBuilder collapsedPath = new StringBuilder();
    String prev = "";
    for (String node : pathWithoutPOS) {
      fullPath.append(node).append(":");
      if (!node.equals(prev)) {
        collapsedPath.append(node).append(":");
      }
      prev = node;
    }
    pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl);
    pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl);

    return pl;
  }