/** * terse representation of a (sub-)tree: NP[the white dog] -vs- (NP (DT the) (JJ white) (NN dog)) */ public static String abbrevTree(Tree tree) { ArrayList<String> toks = new ArrayList(); for (Tree L : tree.getLeaves()) { toks.add(L.label().toString()); } return tree.label().toString() + "[" + StringUtils.join(toks, " ") + "]"; }
/** Expand in a non-overlapping manner the influence of each of our different phrases. */ void expandInfluence() { // Tree base1 = TreeOps.expandUntil(t, leaves[root1], new // TreeOps.regexpMatcher(RelnDep.NP_OR_S)); Tree base1 = TreeOps.expandUntil(t, leaves[root1], new TreeOps.regexpMatcher(NPS)); Tree base2 = TreeOps.expandUntil(t, leaves[root2], new TreeOps.regexpMatcher(StanfordParser.VerbPhrase)); if (base1 == null || base2 == null) return; Tree r1exp = base1; Tree r2exp = base2; while (r1exp != null && r2exp != null && seperate( TreeOps.getSubTreeBoundaries(t, r1exp), TreeOps.getSubTreeBoundaries(t, r2exp))) { base1 = r1exp; base2 = r2exp; // r1exp = TreeOps.expandUntil(t, base1.parent(t), new // TreeOps.regexpMatcher(RelnDep.NP_OR_S)); r1exp = TreeOps.expandUntil(t, base1.parent(t), new TreeOps.regexpMatcher(NPS)); r2exp = TreeOps.expandUntil( t, base2.parent(t), new TreeOps.regexpMatcher(StanfordParser.VerbPhrase)); } r1arr = TreeOps.getSubTreeBoundaries(t, base1); r2arr = TreeOps.getSubTreeBoundaries(t, base2); }
public Tree transformTree(Tree tree) { Label lab = tree.label(); if (tree.isLeaf()) { Tree leaf = tf.newLeaf(lab); leaf.setScore(tree.score()); return leaf; } String s = lab.value(); s = treebankLanguagePack().basicCategory(s); s = treebankLanguagePack().stripGF(s); int numKids = tree.numChildren(); List<Tree> children = new ArrayList<Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.getChild(cNum); Tree newChild = transformTree(child); children.add(newChild); } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.setCategory(s); if (lab instanceof HasTag) { String tag = ((HasTag) lab).tag(); tag = treebankLanguagePack().basicCategory(tag); tag = treebankLanguagePack().stripGF(tag); newLabel.setTag(tag); } Tree node = tf.newTreeNode(newLabel, children); node.setScore(tree.score()); return node; }
/** * Returns the sentence from its tree representation. * * @param t the tree representation of the sentence * @return the sentence */ public static String tree2Words(Tree t) { StringBuilder buffer = new StringBuilder(); List<Tree> leaves = t.getLeaves(); for (Tree leaf : leaves) { String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class); // TODO maybe double check preceding whitespaces, because transformations could have // resulted in the situation that the trailing // whitespaces of out last tokens is not the same as the preceding whitespaces of out // current token BUT: This has also to be done in getTokenListFromTree(...) // now add the trailing whitespaces String trailingWhitespaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class); // if no whitespace-info is available, insert a whitespace this may happen for nodes // inserted by TSurgeon operations if (trailingWhitespaces == null) { trailingWhitespaces = " "; } buffer.append(word).append(trailingWhitespaces); } return buffer.toString(); }
private void verifyTree(Tree expected, Tree result) { if (expected == null) { assertEquals(expected, result); return; } assertEquals(expected.toString(), result.toString()); }
public static String getCleanedUpYield(Tree inputTree) { Tree copyTree = inputTree.deepCopy(); if (DEBUG) System.err.println(copyTree.toString()); String res = copyTree.yield().toString(); if (res.length() > 1) { res = res.substring(0, 1).toUpperCase() + res.substring(1); } // (ROOT (S (NP (NNP Jaguar) (NNS shares)) (VP (VBD skyrocketed) (NP (NN yesterday)) (PP (IN // after) (NP (NP (NNP Mr.) (NNP Ridley) (POS 's)) (NN announcement)))) (. .))) res = res.replaceAll("\\s([\\.,!\\?\\-;:])", "$1"); res = res.replaceAll("(\\$)\\s", "$1"); res = res.replaceAll("can not", "cannot"); res = res.replaceAll("\\s*-LRB-\\s*", " ("); res = res.replaceAll("\\s*-RRB-\\s*", ") "); res = res.replaceAll("\\s*([\\.,?!])\\s*", "$1 "); res = res.replaceAll("\\s+''", "''"); // res = res.replaceAll("\"", ""); res = res.replaceAll("``\\s+", "``"); res = res.replaceAll("\\-[LR]CB\\-", ""); // brackets, e.g., [sic] // remove extra spaces res = res.replaceAll("\\s\\s+", " "); res = res.trim(); return res; }
public static void fillInParseAnnotations( boolean verbose, boolean buildGraphs, CoreMap sentence, Tree tree) { // make sure all tree nodes are CoreLabels // TODO: why isn't this always true? something fishy is going on ParserAnnotatorUtils.convertToCoreLabels(tree); // index nodes, i.e., add start and end token positions to all nodes // this is needed by other annotators down stream, e.g., the NFLAnnotator tree.indexSpans(0); sentence.set(TreeAnnotation.class, tree); if (verbose) { System.err.println("Tree is:"); tree.pennPrint(System.err); } if (buildGraphs) { // generate the dependency graph SemanticGraph deps = generateCollapsedDependencies(tree); SemanticGraph uncollapsedDeps = generateUncollapsedDependencies(tree); SemanticGraph ccDeps = generateCCProcessedDependencies(tree); if (verbose) { System.err.println("SDs:"); System.err.println(deps.toString("plain")); } sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps); sentence.set( SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps); } setMissingTags(sentence, tree); }
private static List<String> myMakeObjects(Tree tree) { List<String> cats = new LinkedList<>(); for (Tree st : tree.subTreeList()) { cats.add(st.value()); } return cats; }
/** @param args */ public static void main(String[] args) { if (args.length != 1) { System.err.println("Usage: java " + ATBCorrector.class.getName() + " filename\n"); System.exit(-1); } TreeTransformer tt = new ATBCorrector(); File f = new File(args[0]); try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8")); TreeReaderFactory trf = new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory(); TreeReader tr = trf.newTreeReader(br); int nTrees = 0; for (Tree t; (t = tr.readTree()) != null; nTrees++) { Tree fixedT = tt.transformTree(t); System.out.println(fixedT.toString()); } tr.close(); System.err.printf("Wrote %d trees%n", nTrees); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
/** * Parses a sentence and returns a string representation of the parse tree. * * @param sentence a sentence * @return Tree whose Label is a MapLabel containing correct begin and end character offsets in * keys BEGIN_KEY and END_KEY */ @SuppressWarnings("unchecked") public static String parse(String sentence) { if (tlp == null || parser == null) throw new RuntimeException("Parser has not been initialized"); // parse the sentence to produce stanford Tree log.debug("Parsing sentence"); Tree tree = null; synchronized (parser) { Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence)); List<Word> words = tokenizer.tokenize(); log.debug("Tokenization: " + words); parser.parse(new Sentence(words)); tree = parser.getBestParse(); } // label tree with character extents // log.debug("Setting character extents"); // updateTreeLabels(tree, tree, new MutableInteger(), new MutableInteger(-1)); // log.debug("Creating offset mapping"); // List<RangeMap> mapping = createMapping(sentence); // log.debug(mapping.toString()); // log.debug("Applying offset mapping"); // mapOffsets(tree, mapping); return tree.toString().replaceAll(" \\[[\\S]+\\]", ""); }
private static Tree skip(Tree candidate, Tree parent, String expectedPOS, int skip) { if (skip == 0) return candidate; Tree lastvalid = candidate; // we are allowed to skip non-matching phrases while (skip > 0) { skip--; // we walk up the do { // if we don't have the right POS, just try our parent candidate = candidate.parent(parent); if (candidate == null) { // we are already on top return lastvalid; } else if (expectedPOS.equals(candidate.value())) { // we have found a good match. this does not count as a skip lastvalid = candidate; } } while (skip >= 0 && !expectedPOS.equals(candidate.value())); } return lastvalid; }
private FSArray addTreebankNodeChildrenToIndexes( TreebankNode parent, JCas jCas, List<CoreLabel> tokenAnns, Tree tree) { Tree[] childTrees = tree.children(); // collect all children (except leaves, which are just the words - POS tags are pre-terminals in // a Stanford tree) List<TreebankNode> childNodes = new ArrayList<TreebankNode>(); for (Tree child : childTrees) { if (!child.isLeaf()) { // set node attributes and add children (mutual recursion) TreebankNode node = new TreebankNode(jCas); node.setParent(parent); this.addTreebankNodeToIndexes(node, jCas, child, tokenAnns); childNodes.add(node); } } // convert the child list into an FSArray FSArray childNodeArray = new FSArray(jCas, childNodes.size()); for (int i = 0; i < childNodes.size(); ++i) { childNodeArray.set(i, childNodes.get(i)); } return childNodeArray; }
public LinkedList<String> getKeyWrodsFromSentence(String string) { LinkedList<String> list = new LinkedList<String>(); String[] sent = string.split(" "); List<HasWord> sentence = new ArrayList<HasWord>(); for (String word : sent) sentence.add(new Word(word)); Tree parse = lp.parse(sentence); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); String[] current; String type, key; List<CoreLabel> labelsList = parse.taggedLabeledYield(); for (Label l : labelsList) { current = l.toString().split("-"); type = current[0]; if (type.equals("NN") || type.equals("NNS")) { key = sent[Integer.parseInt(current[1])]; list.add(key); } } return list; }
/** * Build the set of dependencies for evaluation. This set excludes all dependencies for which the * argument is a punctuation tag. */ @Override protected Set<?> makeObjects(Tree tree) { Set<Dependency<Label, Label, Object>> deps = new HashSet<Dependency<Label, Label, Object>>(); for (Tree node : tree.subTreeList()) { if (DEBUG) EncodingPrintWriter.err.println("Considering " + node.label()); // every child with a different head is an argument, as are ones with // the same head after the first one found if (node.isLeaf() || node.children().length < 2) { continue; } // System.err.println("XXX node is " + node + "; label type is " + // node.label().getClass().getName()); String head = ((HasWord) node.label()).word(); boolean seenHead = false; for (int cNum = 0; cNum < node.children().length; cNum++) { Tree child = node.children()[cNum]; String arg = ((HasWord) child.label()).word(); if (DEBUG) EncodingPrintWriter.err.println("Considering " + head + " --> " + arg); if (head.equals(arg) && !seenHead) { seenHead = true; if (DEBUG) EncodingPrintWriter.err.println(" ... is head"); } else if (!punctFilter.accept(arg)) { deps.add(new UnnamedDependency(head, arg)); if (DEBUG) EncodingPrintWriter.err.println(" ... added"); } else if (DEBUG) { if (DEBUG) EncodingPrintWriter.err.println(" ... is punct dep"); } } } if (DEBUG) { EncodingPrintWriter.err.println("Deps: " + deps); } return deps; }
public Tree transformTree(Tree tree) { Label lab = tree.label(); if (tree.isLeaf()) { Tree leaf = tf.newLeaf(lab); leaf.setScore(tree.score()); return leaf; } String s = lab.value(); s = treebankLanguagePack().basicCategory(s); int numKids = tree.numChildren(); List<Tree> children = new ArrayList<Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.getChild(cNum); Tree newChild = transformTree(child); // cdm 2007: for just subcategory stripping, null shouldn't happen // if (newChild != null) { children.add(newChild); // } } // if (children.isEmpty()) { // return null; // } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.setCategory(s); if (lab instanceof HasTag) { String tag = ((HasTag) lab).tag(); tag = treebankLanguagePack().basicCategory(tag); newLabel.setTag(tag); } Tree node = tf.newTreeNode(newLabel, children); node.setScore(tree.score()); return node; }
/** * This method creates a string which represents the part of the sentence this <code>tree</code> * stands for. * * @param tree A (partial) syntax tree * @return The original sentence part */ public static String printTree(Tree tree) { final StringBuilder sb = new StringBuilder(); for (final Tree t : tree.getLeaves()) { sb.append(t.toString()).append(" "); } return sb.toString().trim(); }
Tree convertTree(String treeText) { Options op = new Options(); HeadFinder binaryHeadFinder = new BinaryHeadFinder(op.tlpParams.headFinder()); Tree tree = Tree.valueOf(treeText); Trees.convertToCoreLabels(tree); tree.percolateHeadAnnotations(binaryHeadFinder); return tree; }
private static String toString(Tree tree, boolean plainPrint) { if (!plainPrint) return tree.toString(); StringBuilder sb = new StringBuilder(); List<Tree> leaves = tree.getLeaves(); for (Tree leaf : leaves) sb.append(((CoreLabel) leaf.label()).value()).append(' '); return sb.toString(); }
private List<Tree> helper(List<Tree> treeList, int start) { List<Tree> newTreeList = new ArrayList<Tree>(treeList.size()); for (Tree tree : treeList) { int end = start + tree.yield().size(); newTreeList.add(prune(tree, start)); start = end; } return newTreeList; }
private boolean LexicalAnalyzer(ArrayList<Word> words, int index, String newWord) { String[] sent = toSentence(words); /// lexical analyzer List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); // PrintStream outa = new PrintStream(new FileOutputStream("output1.txt")); // System.setOut(outa); // System.out.println("KKKKKKK"); // parse.pennPrint(); String oldTree = parse.toString(); // String oldTree=baos.toString(); // System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out))); // System.out.println(oldTree); words.get(index).setNewValue(newWord); sent = toSentence(words); rawWords = Sentence.toCoreLabelList(sent); parse = lp.apply(rawWords); // PrintStream outb = new PrintStream(new FileOutputStream("output2.txt")); // System.setOut(outb); // parse.pennPrint(); String newTree = parse.toString(); oldTree = oldTree.replaceAll(words.get(index).getOrigValue() + "[)]", newWord + ")"); // System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out))); System.out.println(oldTree + "\n" + newTree); // System.out.println(oldTree.equals(newTree)); if (oldTree.equals(newTree)) { if (index == 0) { String str = words.get(index).getNewValue(); String cap = str.substring(0, 1).toUpperCase() + str.substring(1); words.get(index).setNewValue(cap); } return true; } else { words.get(index).setNewValue(null); return false; } /* catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; }*/ // return true; }
public static ArrayList<TaggedWord> StanfordParse(String sentence, LexicalizedParser lp) { TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> rawWords2 = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); Tree parse = lp.apply(rawWords2); ArrayList<TaggedWord> taggedWords = parse.taggedYield(); return taggedWords; }
private static void testParseAndRemovePeriods() { String testSentence = "Now is the time for all good men to come to the aid of their country."; CoreNlpParser parser = new CoreNlpParser(); List<Tree> results = parser.getTextAnnotatedTree(testSentence); for (Tree tree : results) { tree.pennPrint(); } System.out.println("\n"); }
private static int reIndexLeaves(Tree t, int startIndex) { if (t.isLeaf()) { CoreLabel afl = (CoreLabel) t.label(); afl.setIndex(startIndex); startIndex++; } else { for (Tree child : t.children()) { startIndex = reIndexLeaves(child, startIndex); } } return startIndex; }
private static String getTreeCategory(Tree t) { String rootLabel = t.label().toString(); if (rootLabel.equals("S") && t.numChildren() == 1 && t.getChild(0).label().toString().equals("VP") && t.getChild(0).getChild(0).label().toString().equals("VBG")) return "NP"; if (rootLabel.equals("S") && t.numChildren() == 1 && t.getChild(0).label().toString().equals("VP") && t.getChild(0).getChild(0).label().toString().equals("VBN")) return "VP"; return rootLabel; }
/** * Sets the labels on the tree to be the indices of the nodes. Starts counting at the root and * does a postorder traversal. */ static int setIndexLabels(Tree tree, int index) { if (tree.isLeaf()) { return index; } tree.label().setValue(Integer.toString(index)); index++; for (Tree child : tree.children()) { index = setIndexLabels(child, index); } return index; }
/** * Converts the tree labels to CoreLabels. We need this because we store additional info in the * CoreLabel, like token span. * * @param tree */ public static void convertToCoreLabels(Tree tree) { Label l = tree.label(); if (!(l instanceof CoreLabel)) { CoreLabel cl = new CoreLabel(); cl.setValue(l.value()); tree.setLabel(cl); } for (Tree kid : tree.children()) { convertToCoreLabels(kid); } }
public Tense calculateTense(String clause) { final Tree posTree = getPosTree(clause); final Tree word = posTree.getLeaves().get(0); final String pos = word.parent(posTree).label().value().toLowerCase(); if (pos.equals("md")) { return Tense.FUTURE; } if (pos.equals("vbd") || pos.equals("vbn")) { return Tense.PAST; } return Tense.PRESENT; }
/** * Build the set of dependencies for evaluation. This set excludes all dependencies for which the * argument is a punctuation tag. */ @Override protected Set<?> makeObjects(Tree tree) { if (tree == null) { System.err.println("Warning: null tree"); return Generics.newHashSet(); } if (headFinder != null) { tree.percolateHeads(headFinder); } Set<Dependency<Label, Label, Object>> deps = tree.dependencies(punctRejectFilter); return deps; }
protected static String localize(Tree tree) { if (tree.isLeaf()) { return ""; } StringBuilder sb = new StringBuilder(); sb.append(tree.label()); sb.append(" ->"); for (int i = 0; i < tree.children().length; i++) { sb.append(' '); sb.append(tree.children()[i].label()); } return sb.toString(); }
private static <E> void dependencyObjectifyHelper( Tree t, Tree root, HeadFinder hf, Collection<E> c, DependencyTyper<E> typer) { if (t.isLeaf() || t.isPreTerminal()) { return; } Tree headDtr = hf.determineHead(t); for (Tree child : t.children()) { dependencyObjectifyHelper(child, root, hf, c, typer); if (child != headDtr) { c.add(typer.makeDependency(headDtr, child, root)); } } }