public List<CoreMap> getAPIElementSentences(boolean parse) { List<CoreMap> sentences = section.sentences; // getAllSentences(parse); List<CoreMap> apiSentences = new ArrayList<CoreMap>(); String formattedAPI = apiElement .getAPIElementName() .replaceAll("\\(", "") .replaceAll("\\)", "") .replaceAll("\\.", "-") .toLowerCase(); if (sentences == null) System.out.println("Warning"); for (CoreMap sent : sentences) { if (sent.toString().toLowerCase().indexOf("clt_" + formattedAPI) != -1) apiSentences.add(sent); } if (apiSentences.isEmpty()) System.out.println( "WARNGING: In getAPIElementSentences " + apiElement.getAPIElementName() + "," + section.getSubTitle()); return apiSentences; }
public static void main(String[] args) throws IOException, ClassNotFoundException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); boolean verbose = false; ap.addAnnotator(new TokenizerAnnotator(verbose, "en")); ap.addAnnotator(new WordsToSentencesAnnotator(verbose)); // ap.addAnnotator(new NERCombinerAnnotator(verbose)); // ap.addAnnotator(new OldNERAnnotator(verbose)); // ap.addAnnotator(new NERMergingAnnotator(verbose)); ap.addAnnotator(new ParserAnnotator(verbose, -1)); /** * ap.addAnnotator(new UpdateSentenceFromParseAnnotator(verbose)); ap.addAnnotator(new * NumberAnnotator(verbose)); ap.addAnnotator(new * QuantifiableEntityNormalizingAnnotator(verbose)); ap.addAnnotator(new * StemmerAnnotator(verbose)); ap.addAnnotator(new MorphaAnnotator(verbose)); */ // ap.addAnnotator(new SRLAnnotator()); String text = ("USAir said in the filings that Mr. Icahn first contacted Mr. Colodny last September to discuss the benefits of combining TWA and USAir -- either by TWA's acquisition of USAir, or USAir's acquisition of TWA."); Annotation a = new Annotation(text); ap.annotate(a); System.out.println(a.get(CoreAnnotations.TokensAnnotation.class)); for (CoreMap sentence : a.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println(sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); } if (TIME) { System.out.println(ap.timingInformation()); System.err.println("Total time for AnnotationPipeline: " + tim.toSecondsString() + " sec."); } }
public Map<Integer, Integer> getGeneSpans(String text) { Map<Integer, Integer> begin2end = new HashMap<Integer, Integer>(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { List<CoreLabel> candidate = new ArrayList<CoreLabel>(); for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String pos = token.get(PartOfSpeechAnnotation.class); if (pos.startsWith("NN")) { candidate.add(token); } else if (candidate.size() > 0) { int begin = candidate.get(0).beginPosition(); int end = candidate.get(candidate.size() - 1).endPosition(); begin2end.put(begin, end); candidate.clear(); } } if (candidate.size() > 0) { int begin = candidate.get(0).beginPosition(); int end = candidate.get(candidate.size() - 1).endPosition(); begin2end.put(begin, end); candidate.clear(); } } return begin2end; }
public static void main(String[] args) { SentenceDAO sentenceDAO = new SentenceDAOImpl(); List<Sentence> sentences = sentenceDAO.findAll(); Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); int i = 0; for (Sentence sentence : sentences) { if (sentence.getPredicate() == null) { try { System.out.println(i++); String text = sentence.getContent(); Annotation annotation = new Annotation(text); pipeline.annotate(annotation); for (CoreMap core : annotation.get(SentencesAnnotation.class)) { SemanticGraph graph = core.get(CollapsedCCProcessedDependenciesAnnotation.class); sentence.setPredicate(graph.getFirstRoot().lemma()); } sentenceDAO.save(sentence); } catch (Exception e) { e.printStackTrace(); } } } // System.out.println(sentence.getWords()); }
public static void addFigerAnnotationToDocument(Annotation d) throws SQLException { List<CoreMap> sentences = d.get(CoreAnnotations.SentencesAnnotation.class); Set<String> entityIds = new HashSet<String>(); for (CoreMap sen : sentences) { List<Triple<Pair<Integer, Integer>, String, Float>> nelAnnotation = sen.get(NamedEntityLinkingAnnotation.class); for (Triple<Pair<Integer, Integer>, String, Float> t : nelAnnotation) { String id = t.second; if (!id.equals("null")) { entityIds.add(id); } } } Map<String, Set<String>> idTypeMap = bigQuery(entityIds); // add type onto sentences for (CoreMap sen : sentences) { List<Triple<Pair<Integer, Integer>, String, Float>> nelAnnotation = sen.get(NamedEntityLinkingAnnotation.class); List<Triple<Set<String>, Integer, Integer>> figerData = new ArrayList<>(); for (Triple<Pair<Integer, Integer>, String, Float> t : nelAnnotation) { Integer start = t.first.first; Integer end = t.first.second; Set<String> types = null; if (!t.second.equals("null")) { types = idTypeMap.get(GuidMidConversion.convertBackward(t.second)); } Triple<Set<String>, Integer, Integer> figerTrip = new Triple<>(types, start, end); figerData.add(figerTrip); } sen.set(FigerAnnotation.class, figerData); } }
public static List<String> lemmatizeDocument(String documentText) { if (pipeline == null) { loadModels(); } List<String> lemmas = new LinkedList<>(); // create an empty Annotation just with the given text Annotation document = new Annotation(documentText); // run all Annotators on this text pipeline.annotate(document); // Iterate over all of the sentences found List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // Iterate over all tokens in a sentence for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // Retrieve and add the lemma for each word into the // list of lemmas lemmas.add(token.get(CoreAnnotations.LemmaAnnotation.class)); } } return lemmas; }
/** * @param t * @return */ public static String lemmatize(String t) { if (pipeline == null) { loadModels(); } String lemma = ""; try { // create an empty Annotation just with the given text Annotation document = new Annotation(t); // run all Annotators on this text pipeline.annotate(document); // Iterate over all of the sentences found List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // Iterate over all tokens in a sentence for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // Retrieve and add the lemma for each word into the // list of lemmas lemma += " " + token.get(CoreAnnotations.LemmaAnnotation.class); } } } catch (Exception e) { System.err.println("Stanford Lemmatizer error exception Word: " + t); } return lemma.trim(); }
public static void fillInParseAnnotations( boolean verbose, boolean buildGraphs, CoreMap sentence, Tree tree) { // make sure all tree nodes are CoreLabels // TODO: why isn't this always true? something fishy is going on ParserAnnotatorUtils.convertToCoreLabels(tree); // index nodes, i.e., add start and end token positions to all nodes // this is needed by other annotators down stream, e.g., the NFLAnnotator tree.indexSpans(0); sentence.set(TreeAnnotation.class, tree); if (verbose) { System.err.println("Tree is:"); tree.pennPrint(System.err); } if (buildGraphs) { // generate the dependency graph SemanticGraph deps = generateCollapsedDependencies(tree); SemanticGraph uncollapsedDeps = generateUncollapsedDependencies(tree); SemanticGraph ccDeps = generateCCProcessedDependencies(tree); if (verbose) { System.err.println("SDs:"); System.err.println(deps.toString("plain")); } sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps); sentence.set( SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps); } setMissingTags(sentence, tree); }
public List<NLPInfo> analyze(String text) { Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); if (sentences == null || sentences.isEmpty()) { return null; } List<NLPInfo> res = new ArrayList<NLPInfo>(); NLPInfo info; for (CoreMap sentence : sentences) { info = new NLPInfo(); NLPToken tokenInfo; for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { tokenInfo = new NLPToken(); tokenInfo.setWord(token.get(CoreAnnotations.TextAnnotation.class)); tokenInfo.setTag(token.get(CoreAnnotations.PartOfSpeechAnnotation.class)); tokenInfo.setNer(token.get(CoreAnnotations.NamedEntityTagAnnotation.class)); info.appendToken(tokenInfo); } res.add(info); } return res; }
private void findSpeakersInConversation(Dictionaries dict) { for (List<Mention> l : predictedOrderedMentionsBySentence) { for (Mention m : l) { if (m.predicateNominatives == null) continue; for (Mention a : m.predicateNominatives) { if (a.spanToString().toLowerCase().equals("i")) { speakers.put( m.headWord.get(CoreAnnotations.UtteranceAnnotation.class), Integer.toString(m.mentionID)); } } } } List<CoreMap> paragraph = new ArrayList<CoreMap>(); int paragraphUtterIndex = 0; String nextParagraphSpeaker = ""; int paragraphOffset = 0; for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { int currentUtter = sent.get(CoreAnnotations.TokensAnnotation.class) .get(0) .get(CoreAnnotations.UtteranceAnnotation.class); if (paragraphUtterIndex != currentUtter) { nextParagraphSpeaker = findParagraphSpeaker( paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict); paragraphUtterIndex = currentUtter; paragraphOffset += paragraph.size(); paragraph = new ArrayList<CoreMap>(); } paragraph.add(sent); } findParagraphSpeaker( paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict); }
private String findNextParagraphSpeaker( List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) { CoreMap lastSent = paragraph.get(paragraph.size() - 1); String speaker = ""; for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) { if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report") || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) { String word = w.get(CoreAnnotations.TextAnnotation.class); SemanticGraph dependency = lastSent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); IndexedWord t = dependency.getNodeByWordPattern(word); for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) { if (child.first().getShortName().equals("nsubj")) { int subjectIndex = child.second().index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.set(0, paragraph.size() - 1 + paragraphOffset); headPosition.set(1, subjectIndex - 1); if (mentionheadPositions.containsKey(headPosition) && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) { speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID); } } } } } return speaker; }
/** Speaker extraction */ private void findSpeakers(Dictionaries dict) { Boolean useMarkedDiscourseBoolean = annotation.get(CoreAnnotations.UseMarkedDiscourseAnnotation.class); boolean useMarkedDiscourse = (useMarkedDiscourseBoolean != null) ? useMarkedDiscourseBoolean : false; if (Constants.USE_GOLD_SPEAKER_TAGS || useMarkedDiscourse) { for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) { int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class); speakers.put(utterIndex, w.get(CoreAnnotations.SpeakerAnnotation.class)); } } } else { if (docType == DocType.CONVERSATION) findSpeakersInConversation(dict); else if (docType == DocType.ARTICLE) findSpeakersInArticle(dict); // set speaker info to annotation for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) { int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class); if (speakers.containsKey(utterIndex)) { w.set(CoreAnnotations.SpeakerAnnotation.class, speakers.get(utterIndex)); } } } } }
/** set UtteranceAnnotation for quotations: default UtteranceAnnotation = 0 is given */ private void markQuotations(List<CoreMap> results, boolean normalQuotationType) { boolean insideQuotation = false; for (CoreMap m : results) { for (CoreLabel l : m.get(CoreAnnotations.TokensAnnotation.class)) { String w = l.get(CoreAnnotations.TextAnnotation.class); boolean noSpeakerInfo = !l.containsKey(CoreAnnotations.SpeakerAnnotation.class) || l.get(CoreAnnotations.SpeakerAnnotation.class).equals("") || l.get(CoreAnnotations.SpeakerAnnotation.class).startsWith("PER"); if (w.equals("``") || (!insideQuotation && normalQuotationType && w.equals("\""))) { insideQuotation = true; maxUtter++; continue; } else if (w.equals("''") || (insideQuotation && normalQuotationType && w.equals("\""))) { insideQuotation = false; } if (insideQuotation) { l.set(CoreAnnotations.UtteranceAnnotation.class, maxUtter); } if (noSpeakerInfo) { l.set( CoreAnnotations.SpeakerAnnotation.class, "PER" + l.get(CoreAnnotations.UtteranceAnnotation.class)); } } } if (maxUtter == 0 && !normalQuotationType) markQuotations(results, true); }
public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
public static ArrayList<String[]> extractNounPhrases( StanfordCoreNLP pipeline, String text, int searchRange) { ArrayList<String[]> wordPairs = new ArrayList<String[]>(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); MAX_STEPS = searchRange; for (CoreMap sentence : sentences) { List<CoreLabel> labels = sentence.get(TokensAnnotation.class); // Check negation boolean hasNegation = false; for (CoreLabel label : labels) { if (NEGATIONS.contains(label.lemma().toLowerCase())) { hasNegation = true; } } for (int idx = 0; idx < labels.size(); idx++) { CoreLabel label = labels.get(idx); if (NN_TAGS.contains(label.get(PartOfSpeechAnnotation.class))) { for (int step = 1; step <= MAX_STEPS; step++) { CoreLabel leftLabel = labels.get(Math.max(0, idx - step)); if (JJ_TAGS.contains(leftLabel.tag())) { if (hasNegation) addPair( wordPairs, NOT_PREFIX + leftLabel.get(LemmaAnnotation.class), label.get(LemmaAnnotation.class)); else addPair( wordPairs, leftLabel.get(LemmaAnnotation.class), label.get(LemmaAnnotation.class)); break; } CoreLabel rightLabel = labels.get(Math.min(idx + step, labels.size() - 1)); if (JJ_TAGS.contains(rightLabel.tag())) { if (hasNegation) addPair( wordPairs, NOT_PREFIX + rightLabel.get(LemmaAnnotation.class), label.get(LemmaAnnotation.class)); else addPair( wordPairs, rightLabel.get(LemmaAnnotation.class), label.get(LemmaAnnotation.class)); break; } } } } } return wordPairs; }
public Object aggregate(Class key, List<? extends CoreMap> in) { if (in == null) return null; for (int i = in.size() - 1; i >= 0; i--) { CoreMap cm = in.get(i); return cm.get(key); } return null; }
public SUTime.Temporal apply(CoreMap chunk) { if (tokenPattern != null) { return apply(chunk.get(CoreAnnotations.NumerizedTokensAnnotation.class)); // return apply(chunk.get(CoreAnnotations.TokensAnnotation.class)); } else { return apply(chunk.get(CoreAnnotations.TextAnnotation.class)); } }
public Object aggregate(Class key, List<? extends CoreMap> in) { if (in == null) return null; for (CoreMap cm : in) { Object obj = cm.get(key); return obj; } return null; }
/** * Set index for each token and sentence in the document. * * @param doc */ public static void setTokenIndices(Document doc) { int token_index = 0; for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) { for (CoreLabel token : sent.get(TokensAnnotation.class)) { token.set(TokenBeginAnnotation.class, token_index++); } } }
/** * TODO(gabor) JavaDoc * * @param sentence * @param pipeline */ public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) { Annotation ann = new Annotation(StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), " ")); ann.set( CoreAnnotations.TokensAnnotation.class, sentence.get(CoreAnnotations.TokensAnnotation.class)); ann.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence)); pipeline.annotate(ann); }
@Override public int compare(CoreMap sent1, CoreMap sent2) { String d1 = sent1.get(CoreAnnotations.DocIDAnnotation.class); String d2 = sent2.get(CoreAnnotations.DocIDAnnotation.class); if (d1 != null && d2 != null && !d1.equals(d2)) return d1.compareTo(d2); String t1 = sent1.get(CoreAnnotations.TextAnnotation.class); String t2 = sent2.get(CoreAnnotations.TextAnnotation.class); return t1.compareTo(t2); }
/** * Given a set of sentences with annotations from an information extractor class, and the same * sentences with gold-standard annotations, print results on how the information extraction * performed. */ public String printResults(CoreMap goldStandard, CoreMap extractorOutput) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw, true); List<CoreMap> mutableGold = new ArrayList<CoreMap>(); mutableGold.addAll(goldStandard.get(CoreAnnotations.SentencesAnnotation.class)); List<CoreMap> mutableOutput = new ArrayList<CoreMap>(); mutableOutput.addAll(extractorOutput.get(CoreAnnotations.SentencesAnnotation.class)); printResults(pw, mutableGold, mutableOutput); return sw.getBuffer().toString(); }
/** * Finds the position of the sentence in the given document that achieves the best ROUGE-N scores * w.r.t. to the reference summaries. * * @param task the document and the corresponding models * @return the position of the best sentence in the document * @throws IOException */ public int getBestSentencePos(Task task) { Document document = task.getDocument(); Annotation documentAnnotation = annotationProvider.getAnnotation(document.getContent()); RougeN rouge = rougeFactory.make(task.getModels(), annotationProvider); BestSentenceSelector sentenceSelector = new BestSentenceSelector(rouge); Annotation bestAnnotation = sentenceSelector.select(documentAnnotation); CoreMap sentence = bestAnnotation.get(SentencesAnnotation.class).get(0); String bestPos = sentence.get(SentencePositionAnnotation.class); return Integer.valueOf(bestPos); }
public Object aggregate(Class key, List<? extends CoreMap> in) { if (in == null) return null; List<T> res = new ArrayList<T>(); for (CoreMap cm : in) { Object obj = cm.get(key); if (obj != null) { if (obj instanceof List) { res.addAll((List<T>) obj); } } } return res; }
private List<CoreMap> toCoreMaps( CoreMap annotation, List<TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex) { if (timeExpressions == null) return null; List<CoreMap> coreMaps = new ArrayList<CoreMap>(timeExpressions.size()); for (TimeExpression te : timeExpressions) { CoreMap cm = te.getAnnotation(); SUTime.Temporal temporal = te.getTemporal(); if (temporal != null) { String origText = annotation.get(CoreAnnotations.TextAnnotation.class); String text = cm.get(CoreAnnotations.TextAnnotation.class); if (origText != null) { // Make sure the text is from original (and not from concatenated tokens) ChunkAnnotationUtils.annotateChunkText(cm, annotation); text = cm.get(CoreAnnotations.TextAnnotation.class); } Map<String, String> timexAttributes; try { timexAttributes = temporal.getTimexAttributes(timeIndex); if (options.includeRange) { SUTime.Temporal rangeTemporal = temporal.getRange(); if (rangeTemporal != null) { timexAttributes.put("range", rangeTemporal.toString()); } } } catch (Exception e) { logger.log( Level.WARNING, "Failed to get attributes from " + text + ", timeIndex " + timeIndex, e); continue; } Timex timex; try { timex = Timex.fromMap(text, timexAttributes); } catch (Exception e) { logger.log( Level.WARNING, "Failed to process " + text + " with attributes " + timexAttributes, e); continue; } cm.set(TimexAnnotation.class, timex); if (timex != null) { coreMaps.add(cm); } else { logger.warning("No timex expression for: " + text); } } } return coreMaps; }
private void parseThread(ArrayList<Thread> threads) { for (Thread t : threads) { ThreadVector tv = new ThreadVector(t); allThreads.add(tv); for (Email e : t.getEmails()) { StringBuffer sb = new StringBuffer(); for (Sentence s : e.getSentences()) { // if it's the content of this email if (s.getQuotationTimes() == 0) { sb.append(s.getText() + " "); } } String content = sb.toString().toLowerCase(); // create an empty Annotation just with the given text Annotation document = new Annotation(content); // run all Annotators on this text this.pipeline.annotate(document); // Iterate over all of the sentences found List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { List<String> lemmas = new LinkedList<String>(); // Iterate over all tokens in a sentence for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // Retrieve and add the lemma for each word into the // list of lemmas lemmas.add(token.get(LemmaAnnotation.class)); } HashMap<String, Integer> wordCount = countWordsInSentence(lemmas); // if it has valid words if (wordCount.size() > 0) { totalSentenceNumber++; for (String word : wordCount.keySet()) { if (!dictionaryIndex.containsKey(word)) { dictionaryIndex.put(word, dictionaryIndex.size()); dictionaryDocumentCount.put(word, 1); } else { dictionaryDocumentCount.put(word, dictionaryDocumentCount.get(word) + 1); } } SentenceVector sv = new SentenceVector(sentence.toString(), wordCount); tv.addSentenceVectors(sv); } } } } }
public Object aggregate(Class key, List<? extends CoreMap> in) { if (in == null) return null; StringBuilder sb = new StringBuilder(); for (CoreMap cm : in) { Object obj = cm.get(key); if (obj != null) { if (sb.length() > 0) { sb.append(delimiter); } sb.append(obj); } } return sb.toString(); }
public SUTime.Temporal apply(CoreMap chunk) { if (tokenPattern != null) { if (chunk.containsKey(TimeExpression.ChildrenAnnotation.class)) { return apply(chunk.get(TimeExpression.ChildrenAnnotation.class)); } else { return apply(chunk.get(CoreAnnotations.NumerizedTokensAnnotation.class)); // return apply(chunk.get(CoreAnnotations.TokensAnnotation.class)); } } else if (stringPattern != null) { return apply(chunk.get(CoreAnnotations.TextAnnotation.class)); } else { return extract(null); } }
@Override public void print(Annotation doc, OutputStream target, Options options) throws IOException { PrintWriter writer = new PrintWriter(target); List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { SemanticGraph sg = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); if (sg != null) { writer.print(conllUWriter.printSemanticGraph(sg)); } } writer.flush(); }
public List<TimeExpression> extractTimeExpressions(CoreMap annotation, String docDateStr) { List<CoreMap> mergedNumbers = NumberNormalizer.findAndMergeNumbers(annotation); annotation.set(CoreAnnotations.NumerizedTokensAnnotation.class, mergedNumbers); // TODO: docDate may not have century.... SUTime.Time docDate = timexPatterns.parseDateTime(docDateStr); List<? extends MatchedExpression> matchedExpressions = expressionExtractor.extractExpressions(annotation); List<TimeExpression> timeExpressions = new ArrayList<TimeExpression>(matchedExpressions.size()); for (MatchedExpression expr : matchedExpressions) { if (expr instanceof TimeExpression) { timeExpressions.add((TimeExpression) expr); } else { timeExpressions.add(new TimeExpression(expr)); } } // Add back nested time expressions for ranges.... // For now only one level of nesting... if (options.includeNested) { List<TimeExpression> nestedTimeExpressions = new ArrayList<TimeExpression>(); for (TimeExpression te : timeExpressions) { if (te.isIncludeNested()) { List<? extends CoreMap> children = te.getAnnotation().get(TimeExpression.ChildrenAnnotation.class); if (children != null) { for (CoreMap child : children) { TimeExpression childTe = child.get(TimeExpression.Annotation.class); if (childTe != null) { nestedTimeExpressions.add(childTe); } } } } } timeExpressions.addAll(nestedTimeExpressions); } Collections.sort(timeExpressions, MatchedExpression.EXPR_TOKEN_OFFSETS_NESTED_FIRST_COMPARATOR); timeExpressions = filterInvalidTimeExpressions(timeExpressions); // Some resolving is done even if docDate null... if ( /*docDate != null && */ timeExpressions != null) { resolveTimeExpressions(annotation, timeExpressions, docDate); } // Annotate timex return timeExpressions; }