public static List<SentenceBreaker.Sentence> cleanAndBreakSentences(String docText) { // ACE IS EVIL docText = docText.replaceAll("<\\S+>", ""); AlignedSub cleaner = AnalysisUtilities.cleanupDocument(docText); List<SentenceBreaker.Sentence> sentences = SentenceBreaker.getSentences(cleaner); return sentences; }
private LabeledSentence generateSupersenseTaggingInput(Tree sentence) { LabeledSentence res = new LabeledSentence(); List<Tree> leaves = sentence.getLeaves(); for (int i = 0; i < leaves.size(); i++) { String word = leaves.get(i).label().toString(); Tree preterm = leaves.get(i).parent(sentence); String pos = preterm.label().toString(); String stem = AnalysisUtilities.getInstance().getLemma(word, pos); res.addToken(word, stem, pos, "0"); } return res; }