public List<CoreMap> getAPIElementSentences(boolean parse) { List<CoreMap> sentences = section.sentences; // getAllSentences(parse); List<CoreMap> apiSentences = new ArrayList<CoreMap>(); String formattedAPI = apiElement .getAPIElementName() .replaceAll("\\(", "") .replaceAll("\\)", "") .replaceAll("\\.", "-") .toLowerCase(); if (sentences == null) System.out.println("Warning"); for (CoreMap sent : sentences) { if (sent.toString().toLowerCase().indexOf("clt_" + formattedAPI) != -1) apiSentences.add(sent); } if (apiSentences.isEmpty()) System.out.println( "WARNGING: In getAPIElementSentences " + apiElement.getAPIElementName() + "," + section.getSubTitle()); return apiSentences; }
private void parseThread(ArrayList<Thread> threads) { for (Thread t : threads) { ThreadVector tv = new ThreadVector(t); allThreads.add(tv); for (Email e : t.getEmails()) { StringBuffer sb = new StringBuffer(); for (Sentence s : e.getSentences()) { // if it's the content of this email if (s.getQuotationTimes() == 0) { sb.append(s.getText() + " "); } } String content = sb.toString().toLowerCase(); // create an empty Annotation just with the given text Annotation document = new Annotation(content); // run all Annotators on this text this.pipeline.annotate(document); // Iterate over all of the sentences found List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { List<String> lemmas = new LinkedList<String>(); // Iterate over all tokens in a sentence for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // Retrieve and add the lemma for each word into the // list of lemmas lemmas.add(token.get(LemmaAnnotation.class)); } HashMap<String, Integer> wordCount = countWordsInSentence(lemmas); // if it has valid words if (wordCount.size() > 0) { totalSentenceNumber++; for (String word : wordCount.keySet()) { if (!dictionaryIndex.containsKey(word)) { dictionaryIndex.put(word, dictionaryIndex.size()); dictionaryDocumentCount.put(word, 1); } else { dictionaryDocumentCount.put(word, dictionaryDocumentCount.get(word) + 1); } } SentenceVector sv = new SentenceVector(sentence.toString(), wordCount); tv.addSentenceVectors(sv); } } } } }
private void evaluate( String normalizedSentence, String originalSentence, String entity1, String entity2) throws FileNotFoundException, UnsupportedEncodingException { List<RelationInstance> instances = null; try { instances = relationExtraction.extractRelations(normalizedSentence, true); if (instances.size() > 0) { CoreMap sentence = instances.get(0).getAnnotatedSentence(); SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); System.out.println("========== Sentence ========="); System.out.println(sentence.toString()); if (dependencies != null) System.out.println(dependencies.toFormattedString()); System.out.println("======= N-ary Instaces ======"); for (RelationInstance instance : instances) { System.out.println(instance); } } instances = extractBinaryRelations(instances); } catch (Exception e) { e.printStackTrace(); System.out.println("Resuming..."); } boolean relationMatched = false; String concatenatedRelations = ""; if (instances != null && instances.size() > 0) { System.out.println("======= Binary Instaces ======"); for (RelationInstance instance : instances) { System.out.println("Instance: " + instance.getOriginalRelation()); boolean containMention1 = false; boolean containMention2 = false; for (Argument arg : instance.getArguments()) { System.out.println("\tArg: [" + arg.getEntityId() + "] - Type: " + arg.getArgumentType()); // .endsWith() (previously .contains()) is a hack for bad annotated entities in the ground // truth, such as Andre [[[Agassi]]]. if (arg.getEntityName().endsWith(PLACEHOLDER_ENTITY1)) { containMention1 = true; } if (arg.getEntityName().endsWith(PLACEHOLDER_ENTITY2)) { containMention2 = true; } } if (containMention1 && containMention2) { if (concatenatedRelations.isEmpty()) { concatenatedRelations = instance.getOriginalRelation(); } else { // concatenatedRelations += " ,, " + instance.getOriginalRelation(); concatenatedRelations += " " + instance.getOriginalRelation(); } relationMatched = true; } } } if (!relationMatched) { ps.println(entity1 + "\t---\t" + entity2 + "\t" + originalSentence); } else { ps.println( entity1 + "\t" + concatenatedRelations.trim() + "\t" + entity2 + "\t" + originalSentence); } }