/**
  * terse representation of a (sub-)tree: NP[the white dog] -vs- (NP (DT the) (JJ white) (NN dog))
  */
 public static String abbrevTree(Tree tree) {
   ArrayList<String> toks = new ArrayList();
   for (Tree L : tree.getLeaves()) {
     toks.add(L.label().toString());
   }
   return tree.label().toString() + "[" + StringUtils.join(toks, " ") + "]";
 }
예제 #2
0
 /** Expand in a non-overlapping manner the influence of each of our different phrases. */
 void expandInfluence() {
   // Tree base1 = TreeOps.expandUntil(t, leaves[root1], new
   // TreeOps.regexpMatcher(RelnDep.NP_OR_S));
   Tree base1 = TreeOps.expandUntil(t, leaves[root1], new TreeOps.regexpMatcher(NPS));
   Tree base2 =
       TreeOps.expandUntil(t, leaves[root2], new TreeOps.regexpMatcher(StanfordParser.VerbPhrase));
   if (base1 == null || base2 == null) return;
   Tree r1exp = base1;
   Tree r2exp = base2;
   while (r1exp != null
       && r2exp != null
       && seperate(
           TreeOps.getSubTreeBoundaries(t, r1exp), TreeOps.getSubTreeBoundaries(t, r2exp))) {
     base1 = r1exp;
     base2 = r2exp;
     // r1exp = TreeOps.expandUntil(t, base1.parent(t), new
     // TreeOps.regexpMatcher(RelnDep.NP_OR_S));
     r1exp = TreeOps.expandUntil(t, base1.parent(t), new TreeOps.regexpMatcher(NPS));
     r2exp =
         TreeOps.expandUntil(
             t, base2.parent(t), new TreeOps.regexpMatcher(StanfordParser.VerbPhrase));
   }
   r1arr = TreeOps.getSubTreeBoundaries(t, base1);
   r2arr = TreeOps.getSubTreeBoundaries(t, base2);
 }
    public Tree transformTree(Tree tree) {
      Label lab = tree.label();
      if (tree.isLeaf()) {
        Tree leaf = tf.newLeaf(lab);
        leaf.setScore(tree.score());
        return leaf;
      }
      String s = lab.value();
      s = treebankLanguagePack().basicCategory(s);
      s = treebankLanguagePack().stripGF(s);
      int numKids = tree.numChildren();
      List<Tree> children = new ArrayList<Tree>(numKids);
      for (int cNum = 0; cNum < numKids; cNum++) {
        Tree child = tree.getChild(cNum);
        Tree newChild = transformTree(child);
        children.add(newChild);
      }
      CategoryWordTag newLabel = new CategoryWordTag(lab);
      newLabel.setCategory(s);
      if (lab instanceof HasTag) {
        String tag = ((HasTag) lab).tag();
        tag = treebankLanguagePack().basicCategory(tag);
        tag = treebankLanguagePack().stripGF(tag);

        newLabel.setTag(tag);
      }
      Tree node = tf.newTreeNode(newLabel, children);
      node.setScore(tree.score());
      return node;
    }
예제 #4
0
  /**
   * Returns the sentence from its tree representation.
   *
   * @param t the tree representation of the sentence
   * @return the sentence
   */
  public static String tree2Words(Tree t) {
    StringBuilder buffer = new StringBuilder();

    List<Tree> leaves = t.getLeaves();
    for (Tree leaf : leaves) {
      String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

      // TODO maybe double check preceding whitespaces, because transformations could have
      // resulted in the situation that the trailing
      // whitespaces of out last tokens is not the same as the preceding whitespaces of out
      // current token BUT: This has also to be done in getTokenListFromTree(...)

      // now add the trailing whitespaces
      String trailingWhitespaces =
          ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
      // if no whitespace-info is available, insert a whitespace this may happen for nodes
      // inserted by TSurgeon operations
      if (trailingWhitespaces == null) {
        trailingWhitespaces = " ";
      }

      buffer.append(word).append(trailingWhitespaces);
    }

    return buffer.toString();
  }
 private void verifyTree(Tree expected, Tree result) {
   if (expected == null) {
     assertEquals(expected, result);
     return;
   }
   assertEquals(expected.toString(), result.toString());
 }
  public static String getCleanedUpYield(Tree inputTree) {
    Tree copyTree = inputTree.deepCopy();

    if (DEBUG) System.err.println(copyTree.toString());

    String res = copyTree.yield().toString();
    if (res.length() > 1) {
      res = res.substring(0, 1).toUpperCase() + res.substring(1);
    }

    // (ROOT (S (NP (NNP Jaguar) (NNS shares)) (VP (VBD skyrocketed) (NP (NN yesterday)) (PP (IN
    // after) (NP (NP (NNP Mr.) (NNP Ridley) (POS 's)) (NN announcement)))) (. .)))

    res = res.replaceAll("\\s([\\.,!\\?\\-;:])", "$1");
    res = res.replaceAll("(\\$)\\s", "$1");
    res = res.replaceAll("can not", "cannot");
    res = res.replaceAll("\\s*-LRB-\\s*", " (");
    res = res.replaceAll("\\s*-RRB-\\s*", ") ");
    res = res.replaceAll("\\s*([\\.,?!])\\s*", "$1 ");
    res = res.replaceAll("\\s+''", "''");
    // res = res.replaceAll("\"", "");
    res = res.replaceAll("``\\s+", "``");
    res = res.replaceAll("\\-[LR]CB\\-", ""); // brackets, e.g., [sic]

    // remove extra spaces
    res = res.replaceAll("\\s\\s+", " ");
    res = res.trim();

    return res;
  }
예제 #7
0
  public static void fillInParseAnnotations(
      boolean verbose, boolean buildGraphs, CoreMap sentence, Tree tree) {
    // make sure all tree nodes are CoreLabels
    // TODO: why isn't this always true? something fishy is going on
    ParserAnnotatorUtils.convertToCoreLabels(tree);

    // index nodes, i.e., add start and end token positions to all nodes
    // this is needed by other annotators down stream, e.g., the NFLAnnotator
    tree.indexSpans(0);

    sentence.set(TreeAnnotation.class, tree);
    if (verbose) {
      System.err.println("Tree is:");
      tree.pennPrint(System.err);
    }

    if (buildGraphs) {
      // generate the dependency graph
      SemanticGraph deps = generateCollapsedDependencies(tree);
      SemanticGraph uncollapsedDeps = generateUncollapsedDependencies(tree);
      SemanticGraph ccDeps = generateCCProcessedDependencies(tree);
      if (verbose) {
        System.err.println("SDs:");
        System.err.println(deps.toString("plain"));
      }
      sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
      sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
      sentence.set(
          SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
    }

    setMissingTags(sentence, tree);
  }
예제 #8
0
 private static List<String> myMakeObjects(Tree tree) {
   List<String> cats = new LinkedList<>();
   for (Tree st : tree.subTreeList()) {
     cats.add(st.value());
   }
   return cats;
 }
예제 #9
0
  /** @param args */
  public static void main(String[] args) {
    if (args.length != 1) {
      System.err.println("Usage: java " + ATBCorrector.class.getName() + " filename\n");
      System.exit(-1);
    }

    TreeTransformer tt = new ATBCorrector();

    File f = new File(args[0]);
    try {

      BufferedReader br =
          new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
      TreeReaderFactory trf = new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      int nTrees = 0;
      for (Tree t; (t = tr.readTree()) != null; nTrees++) {
        Tree fixedT = tt.transformTree(t);
        System.out.println(fixedT.toString());
      }

      tr.close();

      System.err.printf("Wrote %d trees%n", nTrees);

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();

    } catch (IOException e) {
      e.printStackTrace();
    }
  }
  /**
   * Parses a sentence and returns a string representation of the parse tree.
   *
   * @param sentence a sentence
   * @return Tree whose Label is a MapLabel containing correct begin and end character offsets in
   *     keys BEGIN_KEY and END_KEY
   */
  @SuppressWarnings("unchecked")
  public static String parse(String sentence) {
    if (tlp == null || parser == null)
      throw new RuntimeException("Parser has not been initialized");

    // parse the sentence to produce stanford Tree
    log.debug("Parsing sentence");
    Tree tree = null;
    synchronized (parser) {
      Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
      List<Word> words = tokenizer.tokenize();
      log.debug("Tokenization: " + words);
      parser.parse(new Sentence(words));
      tree = parser.getBestParse();
    }

    // label tree with character extents
    // log.debug("Setting character extents");
    // updateTreeLabels(tree, tree, new MutableInteger(), new MutableInteger(-1));
    // log.debug("Creating offset mapping");
    // List<RangeMap> mapping = createMapping(sentence);
    // log.debug(mapping.toString());
    // log.debug("Applying offset mapping");
    // mapOffsets(tree, mapping);

    return tree.toString().replaceAll(" \\[[\\S]+\\]", "");
  }
예제 #11
0
  private static Tree skip(Tree candidate, Tree parent, String expectedPOS, int skip) {
    if (skip == 0) return candidate;

    Tree lastvalid = candidate;

    // we are allowed to skip non-matching phrases
    while (skip > 0) {
      skip--;

      // we walk up the
      do {
        // if we don't have the right POS, just try our parent
        candidate = candidate.parent(parent);

        if (candidate == null) {
          // we are already on top
          return lastvalid;
        } else if (expectedPOS.equals(candidate.value())) {
          // we have found a good match. this does not count as a skip
          lastvalid = candidate;
        }
      } while (skip >= 0 && !expectedPOS.equals(candidate.value()));
    }
    return lastvalid;
  }
예제 #12
0
  private FSArray addTreebankNodeChildrenToIndexes(
      TreebankNode parent, JCas jCas, List<CoreLabel> tokenAnns, Tree tree) {
    Tree[] childTrees = tree.children();

    // collect all children (except leaves, which are just the words - POS tags are pre-terminals in
    // a Stanford tree)
    List<TreebankNode> childNodes = new ArrayList<TreebankNode>();
    for (Tree child : childTrees) {
      if (!child.isLeaf()) {

        // set node attributes and add children (mutual recursion)
        TreebankNode node = new TreebankNode(jCas);
        node.setParent(parent);
        this.addTreebankNodeToIndexes(node, jCas, child, tokenAnns);
        childNodes.add(node);
      }
    }

    // convert the child list into an FSArray
    FSArray childNodeArray = new FSArray(jCas, childNodes.size());
    for (int i = 0; i < childNodes.size(); ++i) {
      childNodeArray.set(i, childNodes.get(i));
    }
    return childNodeArray;
  }
예제 #13
0
  public LinkedList<String> getKeyWrodsFromSentence(String string) {
    LinkedList<String> list = new LinkedList<String>();

    String[] sent = string.split(" ");
    List<HasWord> sentence = new ArrayList<HasWord>();
    for (String word : sent) sentence.add(new Word(word));

    Tree parse = lp.parse(sentence);
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);

    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();

    String[] current;
    String type, key;
    List<CoreLabel> labelsList = parse.taggedLabeledYield();
    for (Label l : labelsList) {
      current = l.toString().split("-");
      type = current[0];
      if (type.equals("NN") || type.equals("NNS")) {
        key = sent[Integer.parseInt(current[1])];
        list.add(key);
      }
    }
    return list;
  }
예제 #14
0
 /**
  * Build the set of dependencies for evaluation. This set excludes all dependencies for which the
  * argument is a punctuation tag.
  */
 @Override
 protected Set<?> makeObjects(Tree tree) {
   Set<Dependency<Label, Label, Object>> deps = new HashSet<Dependency<Label, Label, Object>>();
   for (Tree node : tree.subTreeList()) {
     if (DEBUG) EncodingPrintWriter.err.println("Considering " + node.label());
     // every child with a different head is an argument, as are ones with
     // the same head after the first one found
     if (node.isLeaf() || node.children().length < 2) {
       continue;
     }
     // System.err.println("XXX node is " + node + "; label type is " +
     //                         node.label().getClass().getName());
     String head = ((HasWord) node.label()).word();
     boolean seenHead = false;
     for (int cNum = 0; cNum < node.children().length; cNum++) {
       Tree child = node.children()[cNum];
       String arg = ((HasWord) child.label()).word();
       if (DEBUG) EncodingPrintWriter.err.println("Considering " + head + " --> " + arg);
       if (head.equals(arg) && !seenHead) {
         seenHead = true;
         if (DEBUG) EncodingPrintWriter.err.println("  ... is head");
       } else if (!punctFilter.accept(arg)) {
         deps.add(new UnnamedDependency(head, arg));
         if (DEBUG) EncodingPrintWriter.err.println("  ... added");
       } else if (DEBUG) {
         if (DEBUG) EncodingPrintWriter.err.println("  ... is punct dep");
       }
     }
   }
   if (DEBUG) {
     EncodingPrintWriter.err.println("Deps: " + deps);
   }
   return deps;
 }
 public Tree transformTree(Tree tree) {
   Label lab = tree.label();
   if (tree.isLeaf()) {
     Tree leaf = tf.newLeaf(lab);
     leaf.setScore(tree.score());
     return leaf;
   }
   String s = lab.value();
   s = treebankLanguagePack().basicCategory(s);
   int numKids = tree.numChildren();
   List<Tree> children = new ArrayList<Tree>(numKids);
   for (int cNum = 0; cNum < numKids; cNum++) {
     Tree child = tree.getChild(cNum);
     Tree newChild = transformTree(child);
     // cdm 2007: for just subcategory stripping, null shouldn't happen
     // if (newChild != null) {
     children.add(newChild);
     // }
   }
   // if (children.isEmpty()) {
   //   return null;
   // }
   CategoryWordTag newLabel = new CategoryWordTag(lab);
   newLabel.setCategory(s);
   if (lab instanceof HasTag) {
     String tag = ((HasTag) lab).tag();
     tag = treebankLanguagePack().basicCategory(tag);
     newLabel.setTag(tag);
   }
   Tree node = tf.newTreeNode(newLabel, children);
   node.setScore(tree.score());
   return node;
 }
예제 #16
0
  /**
   * This method creates a string which represents the part of the sentence this <code>tree</code>
   * stands for.
   *
   * @param tree A (partial) syntax tree
   * @return The original sentence part
   */
  public static String printTree(Tree tree) {
    final StringBuilder sb = new StringBuilder();

    for (final Tree t : tree.getLeaves()) {
      sb.append(t.toString()).append(" ");
    }
    return sb.toString().trim();
  }
예제 #17
0
 Tree convertTree(String treeText) {
   Options op = new Options();
   HeadFinder binaryHeadFinder = new BinaryHeadFinder(op.tlpParams.headFinder());
   Tree tree = Tree.valueOf(treeText);
   Trees.convertToCoreLabels(tree);
   tree.percolateHeadAnnotations(binaryHeadFinder);
   return tree;
 }
예제 #18
0
  private static String toString(Tree tree, boolean plainPrint) {
    if (!plainPrint) return tree.toString();

    StringBuilder sb = new StringBuilder();
    List<Tree> leaves = tree.getLeaves();
    for (Tree leaf : leaves) sb.append(((CoreLabel) leaf.label()).value()).append(' ');

    return sb.toString();
  }
예제 #19
0
 private List<Tree> helper(List<Tree> treeList, int start) {
   List<Tree> newTreeList = new ArrayList<Tree>(treeList.size());
   for (Tree tree : treeList) {
     int end = start + tree.yield().size();
     newTreeList.add(prune(tree, start));
     start = end;
   }
   return newTreeList;
 }
예제 #20
0
  private boolean LexicalAnalyzer(ArrayList<Word> words, int index, String newWord) {
    String[] sent = toSentence(words);
    /// lexical analyzer
    List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);

    //		PrintStream outa = new PrintStream(new FileOutputStream("output1.txt"));

    //	    System.setOut(outa);
    //	    System.out.println("KKKKKKK");
    //	    parse.pennPrint();
    String oldTree = parse.toString();
    //	    String oldTree=baos.toString();
    //	    System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out)));
    //	    System.out.println(oldTree);

    words.get(index).setNewValue(newWord);
    sent = toSentence(words);
    rawWords = Sentence.toCoreLabelList(sent);
    parse = lp.apply(rawWords);
    //	    PrintStream outb = new PrintStream(new FileOutputStream("output2.txt"));
    //	    System.setOut(outb);

    //	    parse.pennPrint();
    String newTree = parse.toString();

    oldTree = oldTree.replaceAll(words.get(index).getOrigValue() + "[)]", newWord + ")");
    //	    System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out)));
    System.out.println(oldTree + "\n" + newTree);

    //	    	System.out.println(oldTree.equals(newTree));

    if (oldTree.equals(newTree)) {
      if (index == 0) {
        String str = words.get(index).getNewValue();
        String cap = str.substring(0, 1).toUpperCase() + str.substring(1);
        words.get(index).setNewValue(cap);
      }
      return true;
    } else {
      words.get(index).setNewValue(null);
      return false;
    }

    /* catch (FileNotFoundException e) {
    	// TODO Auto-generated catch block
    	e.printStackTrace();
    	return false;
    } catch (IOException e) {
    	// TODO Auto-generated catch block
    	e.printStackTrace();
    	return false;
    }*/

    //		return true;
  }
  public static ArrayList<TaggedWord> StanfordParse(String sentence, LexicalizedParser lp) {

    TokenizerFactory<CoreLabel> tokenizerFactory =
        PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    List<CoreLabel> rawWords2 =
        tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
    Tree parse = lp.apply(rawWords2);
    ArrayList<TaggedWord> taggedWords = parse.taggedYield();

    return taggedWords;
  }
  private static void testParseAndRemovePeriods() {
    String testSentence = "Now is the time for all good men to come to the aid of their country.";

    CoreNlpParser parser = new CoreNlpParser();
    List<Tree> results = parser.getTextAnnotatedTree(testSentence);
    for (Tree tree : results) {
      tree.pennPrint();
    }

    System.out.println("\n");
  }
예제 #23
0
 private static int reIndexLeaves(Tree t, int startIndex) {
   if (t.isLeaf()) {
     CoreLabel afl = (CoreLabel) t.label();
     afl.setIndex(startIndex);
     startIndex++;
   } else {
     for (Tree child : t.children()) {
       startIndex = reIndexLeaves(child, startIndex);
     }
   }
   return startIndex;
 }
예제 #24
0
 private static String getTreeCategory(Tree t) {
   String rootLabel = t.label().toString();
   if (rootLabel.equals("S")
       && t.numChildren() == 1
       && t.getChild(0).label().toString().equals("VP")
       && t.getChild(0).getChild(0).label().toString().equals("VBG")) return "NP";
   if (rootLabel.equals("S")
       && t.numChildren() == 1
       && t.getChild(0).label().toString().equals("VP")
       && t.getChild(0).getChild(0).label().toString().equals("VBN")) return "VP";
   return rootLabel;
 }
  /**
   * Sets the labels on the tree to be the indices of the nodes. Starts counting at the root and
   * does a postorder traversal.
   */
  static int setIndexLabels(Tree tree, int index) {
    if (tree.isLeaf()) {
      return index;
    }

    tree.label().setValue(Integer.toString(index));
    index++;
    for (Tree child : tree.children()) {
      index = setIndexLabels(child, index);
    }
    return index;
  }
예제 #26
0
  /**
   * Converts the tree labels to CoreLabels. We need this because we store additional info in the
   * CoreLabel, like token span.
   *
   * @param tree
   */
  public static void convertToCoreLabels(Tree tree) {
    Label l = tree.label();
    if (!(l instanceof CoreLabel)) {
      CoreLabel cl = new CoreLabel();
      cl.setValue(l.value());
      tree.setLabel(cl);
    }

    for (Tree kid : tree.children()) {
      convertToCoreLabels(kid);
    }
  }
예제 #27
0
 public Tense calculateTense(String clause) {
   final Tree posTree = getPosTree(clause);
   final Tree word = posTree.getLeaves().get(0);
   final String pos = word.parent(posTree).label().value().toLowerCase();
   if (pos.equals("md")) {
     return Tense.FUTURE;
   }
   if (pos.equals("vbd") || pos.equals("vbn")) {
     return Tense.PAST;
   }
   return Tense.PRESENT;
 }
예제 #28
0
  /**
   * Build the set of dependencies for evaluation. This set excludes all dependencies for which the
   * argument is a punctuation tag.
   */
  @Override
  protected Set<?> makeObjects(Tree tree) {
    if (tree == null) {
      System.err.println("Warning: null tree");
      return Generics.newHashSet();
    }
    if (headFinder != null) {
      tree.percolateHeads(headFinder);
    }

    Set<Dependency<Label, Label, Object>> deps = tree.dependencies(punctRejectFilter);
    return deps;
  }
예제 #29
0
 protected static String localize(Tree tree) {
   if (tree.isLeaf()) {
     return "";
   }
   StringBuilder sb = new StringBuilder();
   sb.append(tree.label());
   sb.append(" ->");
   for (int i = 0; i < tree.children().length; i++) {
     sb.append(' ');
     sb.append(tree.children()[i].label());
   }
   return sb.toString();
 }
 private static <E> void dependencyObjectifyHelper(
     Tree t, Tree root, HeadFinder hf, Collection<E> c, DependencyTyper<E> typer) {
   if (t.isLeaf() || t.isPreTerminal()) {
     return;
   }
   Tree headDtr = hf.determineHead(t);
   for (Tree child : t.children()) {
     dependencyObjectifyHelper(child, root, hf, c, typer);
     if (child != headDtr) {
       c.add(typer.makeDependency(headDtr, child, root));
     }
   }
 }