Exemplos de CoreLabel.getString em Java

Linguagem de programação: Java

Espaço para nome / nome do pacote: edu.stanford.nlp.ling

Classe / Tipo: CoreLabel

Método / Função: getString

Exemplos em hotexamples.com: 2

CoreLabel.getString em Java - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de edu.stanford.nlp.ling.CoreLabel.getString em Java extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

get(30)

word(18)

set(13)

tag(6)

value(5)

factory(4)

setValue(4)

setIndex(4)

containsKey(4)

setTag(3)

beginPosition(3)

lemma(3)

originalText(3)

index(3)

endPosition(3)

ner(2)

setLemma(2)

setNER(2)

setWord(2)

getString(2)

toShorterString(2)

toString(2)

remove(1)

setSentIndex(1)

setDocID(1)

Métodos Frequentes

get (30)

word (18)

set (13)

tag (6)

value (5)

factory (4)

setValue (4)

setIndex (4)

containsKey (4)

setTag (3)

Métodos Frequentes

beginPosition (3)

lemma (3)

originalText (3)

index (3)

endPosition (3)

ner (2)

setLemma (2)

setNER (2)

setWord (2)

getString (2)

toShorterString (2)

toString (2)

remove (1)

setSentIndex (1)

setDocID (1)

Métodos Frequentes

toShorterString (2)

toString (2)

remove (1)

setSentIndex (1)

setDocID (1)

Relacionados

TreatmentEquipmentConfig

DBConnectionManager

TileEntityTreasure

CxxCompilationDatabaseUtils

AccountBalancePresenter

ConexionSql

ValueDescriptorImpl

InternalMove

HomePresenter

PsiClass

Related in langs

Eloqua\Client (PHP)

Assets (PHP)

StepDefinition (C#)

HomeController (C#)

belle_sip_channel_queue_message (C++)

const_iterator (C++)

GenTexture (Go)

Plot (Go)

Account (Python)

cat (Python)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: BasicEntityExtractor.java Projeto: automenta/corenlp

/** * for printing labeled sentence in less verbose manner * * @return string for printing */ public static String labeledSentenceToString(List<CoreLabel> labeledSentence, boolean printNer) { StringBuilder sb = new StringBuilder(); sb.append("[ "); for (CoreLabel label : labeledSentence) { String word = label.getString(annotationForWord); String answer = label.getString(AnswerAnnotation.class); String tag = label.getString(PartOfSpeechAnnotation.class); sb.append(word).append('(').append(tag); if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(answer)) { sb.append(' ').append(answer); } if (printNer) { sb.append(" ner:").append(label.ner()); } sb.append(") "); } sb.append(']'); return sb.toString(); }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: MUCMentionExtractor.java Projeto: taarraas/matrixfactorization

@Override public Document nextDoc() throws Exception { List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>(); List<Tree> allTrees = new ArrayList<Tree>(); List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>(); List<List<Mention>> allPredictedMentions; List<CoreMap> allSentences = new ArrayList<CoreMap>(); Annotation docAnno = new Annotation(""); Pattern docPattern = Pattern.compile("<DOC>(.*?)</DOC>", Pattern.DOTALL + Pattern.CASE_INSENSITIVE); Pattern sentencePattern = Pattern.compile( "(<s>|<hl>|<dd>|<DATELINE>)(.*?)(</s>|</hl>|</dd>|</DATELINE>)", Pattern.DOTALL + Pattern.CASE_INSENSITIVE); Matcher docMatcher = docPattern.matcher(fileContents); if (!docMatcher.find(currentOffset)) return null; currentOffset = docMatcher.end(); String doc = docMatcher.group(1); Matcher sentenceMatcher = sentencePattern.matcher(doc); String ner = null; // Maintain current document ID. Pattern docIDPattern = Pattern.compile("<DOCNO>(.*?)</DOCNO>", Pattern.DOTALL + Pattern.CASE_INSENSITIVE); Matcher docIDMatcher = docIDPattern.matcher(doc); if (docIDMatcher.find()) currentDocumentID = docIDMatcher.group(1); else currentDocumentID = "documentAfter " + currentDocumentID; while (sentenceMatcher.find()) { String sentenceString = sentenceMatcher.group(2); List<CoreLabel> words = tokenizerFactory.getTokenizer(new StringReader(sentenceString)).tokenize(); // FIXING TOKENIZATION PROBLEMS for (int i = 0; i < words.size(); i++) { CoreLabel w = words.get(i); if (i > 0 && w.word().equals("$")) { if (!words.get(i - 1).word().endsWith("PRP") && !words.get(i - 1).word().endsWith("WP")) continue; words.get(i - 1).set(CoreAnnotations.TextAnnotation.class, words.get(i - 1).word() + "$"); words.remove(i); i--; } else if (w.word().equals("\\/")) { if (words.get(i - 1).word().equals("</COREF>")) continue; w.set( CoreAnnotations.TextAnnotation.class, words.get(i - 1).word() + "\\/" + words.get(i + 1).word()); words.remove(i + 1); words.remove(i - 1); } } // END FIXING TOKENIZATION PROBLEMS List<CoreLabel> sentence = new ArrayList<CoreLabel>(); // MUC accepts embedded coref mentions, so we need to keep a stack for the mentions currently // open Stack<Mention> stack = new Stack<Mention>(); List<Mention> mentions = new ArrayList<Mention>(); allWords.add(sentence); allGoldMentions.add(mentions); for (CoreLabel word : words) { String w = word.get(CoreAnnotations.TextAnnotation.class); // found regular token: WORD/POS if (!w.startsWith("<") && w.contains("\\/") && w.lastIndexOf("\\/") != w.length() - 2) { int i = w.lastIndexOf("\\/"); String w1 = w.substring(0, i); // we do NOT set POS info here. We take the POS tags from the parser! word.set(CoreAnnotations.TextAnnotation.class, w1); word.remove(CoreAnnotations.OriginalTextAnnotation.class); if (Constants.USE_GOLD_NE) { if (ner != null) { word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner); } else { word.set(CoreAnnotations.NamedEntityTagAnnotation.class, "O"); } } sentence.add(word); } // found the start SGML tag for a NE, e.g., "<ORGANIZATION>" else if (w.startsWith("<") && !w.startsWith("<COREF") && !w.startsWith("</")) { Pattern nerPattern = Pattern.compile("<(.*?)>"); Matcher m = nerPattern.matcher(w); m.find(); ner = m.group(1); } // found the end SGML tag for a NE, e.g., "</ORGANIZATION>" else if (w.startsWith("</") && !w.startsWith("</COREF")) { Pattern nerPattern = Pattern.compile("</(.*?)>"); Matcher m = nerPattern.matcher(w); m.find(); String ner1 = m.group(1); if (ner != null && !ner.equals(ner1)) throw new RuntimeException("Unmatched NE labels in MUC file: " + ner + " v. " + ner1); ner = null; } // found the start SGML tag for a coref mention else if (w.startsWith("<COREF")) { Mention mention = new Mention(); // position of this mention in the sentence mention.startIndex = sentence.size(); // extract GOLD info about this coref chain. needed for eval Pattern idPattern = Pattern.compile("ID=\"(.*?)\""); Pattern refPattern = Pattern.compile("REF=\"(.*?)\""); Matcher m = idPattern.matcher(w); m.find(); mention.mentionID = Integer.parseInt(m.group(1)); m = refPattern.matcher(w); if (m.find()) { mention.originalRef = Integer.parseInt(m.group(1)); } // open mention. keep track of all open mentions using the stack stack.push(mention); } // found the end SGML tag for a coref mention else if (w.equals("</COREF>")) { Mention mention = stack.pop(); mention.endIndex = sentence.size(); // this is a closed mention. add it to the final list of mentions // System.err.printf("Found MENTION: ID=%d, REF=%d\n", mention.mentionID, // mention.originalRef); mentions.add(mention); } else { word.remove(CoreAnnotations.OriginalTextAnnotation.class); if (Constants.USE_GOLD_NE) { if (ner != null) { word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner); } else { word.set(CoreAnnotations.NamedEntityTagAnnotation.class, "O"); } } sentence.add(word); } } StringBuilder textContent = new StringBuilder(); for (int i = 0; i < sentence.size(); i++) { CoreLabel w = sentence.get(i); w.set(CoreAnnotations.IndexAnnotation.class, i + 1); w.set(CoreAnnotations.UtteranceAnnotation.class, 0); if (i > 0) textContent.append(" "); textContent.append(w.getString(CoreAnnotations.TextAnnotation.class)); } CoreMap sentCoreMap = new Annotation(textContent.toString()); allSentences.add(sentCoreMap); sentCoreMap.set(CoreAnnotations.TokensAnnotation.class, sentence); } // assign goldCorefClusterID Map<Integer, Mention> idMention = Generics.newHashMap(); // temporary use for (List<Mention> goldMentions : allGoldMentions) { for (Mention m : goldMentions) { idMention.put(m.mentionID, m); } } for (List<Mention> goldMentions : allGoldMentions) { for (Mention m : goldMentions) { if (m.goldCorefClusterID == -1) { if (m.originalRef == -1) m.goldCorefClusterID = m.mentionID; else { int ref = m.originalRef; while (true) { Mention m2 = idMention.get(ref); if (m2.goldCorefClusterID != -1) { m.goldCorefClusterID = m2.goldCorefClusterID; break; } else if (m2.originalRef == -1) { m2.goldCorefClusterID = m2.mentionID; m.goldCorefClusterID = m2.goldCorefClusterID; break; } else { ref = m2.originalRef; } } } } } } docAnno.set(CoreAnnotations.SentencesAnnotation.class, allSentences); stanfordProcessor.annotate(docAnno); if (allSentences.size() != allWords.size()) throw new IllegalStateException("allSentences != allWords"); for (int i = 0; i < allSentences.size(); i++) { List<CoreLabel> annotatedSent = allSentences.get(i).get(CoreAnnotations.TokensAnnotation.class); List<CoreLabel> unannotatedSent = allWords.get(i); List<Mention> mentionInSent = allGoldMentions.get(i); for (Mention m : mentionInSent) { m.dependency = allSentences .get(i) .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); } if (annotatedSent.size() != unannotatedSent.size()) { throw new IllegalStateException("annotatedSent != unannotatedSent"); } for (int j = 0, sz = annotatedSent.size(); j < sz; j++) { CoreLabel annotatedWord = annotatedSent.get(j); CoreLabel unannotatedWord = unannotatedSent.get(j); if (!annotatedWord .get(CoreAnnotations.TextAnnotation.class) .equals(unannotatedWord.get(CoreAnnotations.TextAnnotation.class))) { throw new IllegalStateException("annotatedWord != unannotatedWord"); } } allWords.set(i, annotatedSent); allTrees.add(allSentences.get(i).get(TreeCoreAnnotations.TreeAnnotation.class)); } // extract predicted mentions if (Constants.USE_GOLD_MENTIONS) allPredictedMentions = allGoldMentions; else allPredictedMentions = mentionFinder.extractPredictedMentions(docAnno, maxID, dictionaries); // add the relevant fields to mentions and order them for coref return arrange(docAnno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); }