private static List<TaggedWord> cleanTags(List twList, TreebankLanguagePack tlp) { int sz = twList.size(); List<TaggedWord> l = new ArrayList<TaggedWord>(sz); for (int i = 0; i < sz; i++) { TaggedWord tw = (TaggedWord) twList.get(i); TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag())); l.add(tw2); } return l; }
public ArrayList<String> getNounsFromSentence(String sentence) { ArrayList<TaggedWord> tw = parseSentenceTD(sentence); ArrayList<String> nouns = new ArrayList<String>(); for (TaggedWord t : tw) { if (t.tag().startsWith("N")) { nouns.add(t.value()); } } return nouns; }
@Override public void train(List<TaggedWord> sentence) { lex.train(sentence, 1.0); String last = null; for (TaggedWord tagLabel : sentence) { String tag = tagLabel.tag(); tagIndex.add(tag); if (last == null) { initial.incrementCount(tag); } else { ruleCounter.incrementCount2D(last, tag); } last = tag; } }
/** * Turns a sentence into a flat phrasal tree. The structure is S -> tag*. And then each tag goes * to a word. The tag is either found from the label or made "WD". The tag and phrasal node have a * StringLabel. * * @param s The Sentence to make the Tree from * @param lf The LabelFactory with which to create the new Tree labels * @return The one phrasal level Tree */ public static Tree toFlatTree(Sentence<?> s, LabelFactory lf) { List<Tree> daughters = new ArrayList<Tree>(s.length()); for (HasWord word : s) { Tree wordNode = new LabeledScoredTreeLeaf(lf.newLabel(word.word())); if (word instanceof TaggedWord) { TaggedWord taggedWord = (TaggedWord) word; wordNode = new LabeledScoredTreeNode( new StringLabel(taggedWord.tag()), Collections.singletonList(wordNode)); } else { wordNode = new LabeledScoredTreeNode(lf.newLabel("WD"), Collections.singletonList(wordNode)); } daughters.add(wordNode); } return new LabeledScoredTreeNode(new StringLabel("S"), daughters); }