Java Token.getPos Exemples

Langage de programmation: Java

Espace de nommage/Pack: de.unihd.dbs.uima.types.heideltime

Class/Type: Token

Méthode/Fonction: getPos

Exemples au hotexamples.com: 2

Java Token.getPos - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de de.unihd.dbs.uima.types.heideltime.Token.getPos extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

addToIndexes(3)

getBegin(3)

getEnd(3)

getCoveredText(2)

getPos(2)

setBegin(2)

setEnd(2)

setPos(2)

removeFromIndexes(1)

Méthodes fréquemment utilisées

addToIndexes (3)

getBegin (3)

getEnd (3)

getCoveredText (2)

getPos (2)

setBegin (2)

setEnd (2)

setPos (2)

removeFromIndexes (1)

Exemple #1

0

Afficher le fichier

Fichier : TreeTaggerWrapper.java Projet : qwaider/heideltime

/** * improve german sentences; the treetagger splits german sentences incorrectly on some occasions * * @param jcas JCas object supplied by the pipeline */ private void improveGermanSentences(JCas jcas) { /* * these POS tag sequences will decide whether we want to merge two sentences * that have (supposedly wrongfully) been split. */ HashSet<String[]> posRules = new HashSet<String[]>(); posRules.add(new String[] {"CARD", "\\$.", "NN"}); posRules.add(new String[] {"CARD", "\\$.", "NE"}); FSIterator sentIter = jcas.getAnnotationIndex(Sentence.type).iterator(); // compare two sentences at a time in order to have access to all POS tags HashSet<HashSet<Sentence>> toMerge = new HashSet<HashSet<Sentence>>(); Sentence prevSent = null, thisSent = null; while (sentIter.hasNext()) { if (thisSent == null) { thisSent = (Sentence) sentIter.next(); continue; } prevSent = thisSent; thisSent = (Sentence) sentIter.next(); /* * select the last two tokens within the previous sentence as well as the * first of the current one and check for matches. */ Token penultimateToken = null, ultimateToken = null, firstToken = null; FSIterator tokIter = jcas.getAnnotationIndex(Token.type).subiterator(thisSent); if (tokIter.hasNext()) { firstToken = (Token) tokIter.next(); } tokIter = jcas.getAnnotationIndex(Token.type).subiterator(prevSent); while (tokIter.hasNext()) { if (ultimateToken == null) { ultimateToken = (Token) tokIter.next(); continue; } penultimateToken = ultimateToken; ultimateToken = (Token) tokIter.next(); } // check that all tokens for further analysis are present. if not: skip if (penultimateToken == null || ultimateToken == null || firstToken == null) { continue; } // check rules, memorize sentences to be merged for (String[] posRule : posRules) { /* * either one of the pre-defined POS rules fit, or the first token's * covered text begins with lower case characters. */ if ((penultimateToken.getPos() != null && penultimateToken.getPos().matches(posRule[0]) && ultimateToken.getPos() != null && ultimateToken.getPos().matches(posRule[1]) && firstToken.getPos() != null && firstToken.getPos().matches(posRule[2])) || (firstToken.getCoveredText().matches("^[a-z/].*"))) { /* * check whether one of the previous candidate pairs already * contains one of our sentences. */ Boolean candidateExisted = false; for (HashSet<Sentence> mergeCandidate : toMerge) { if (mergeCandidate.contains(thisSent) || mergeCandidate.contains(prevSent)) { // we add both here because sets ignore duplicates mergeCandidate.add(prevSent); mergeCandidate.add(thisSent); candidateExisted = true; break; } } /* * if one of the sentences was not already to be merged with another, * create a new merge candidate set */ if (!candidateExisted) { HashSet<Sentence> newCandidate = new HashSet<Sentence>(); newCandidate.add(prevSent); newCandidate.add(thisSent); toMerge.add(newCandidate); } break; // don't need to do the next rules; already merging. } } } // iterate over the previously collected merge candidates for (HashSet<Sentence> mergeCandidate : toMerge) { // find the earliest beginning and latest end for the set of sentences Integer beginIndex = Integer.MAX_VALUE, endIndex = Integer.MIN_VALUE; Sentence mergedSent = new Sentence(jcas); for (Sentence s : mergeCandidate) { if (s.getBegin() < beginIndex) { beginIndex = s.getBegin(); } if (s.getEnd() > endIndex) { endIndex = s.getEnd(); } s.removeFromIndexes(); } // set values, add to jcas mergedSent.setBegin(beginIndex); mergedSent.setEnd(endIndex); mergedSent.addToIndexes(); } }

Exemple #2

0

Afficher le fichier

Fichier : TreeTaggerWrapper.java Projet : qwaider/heideltime

/** * based on tokens from the jcas object, adds part of speech (POS) and sentence tags to the jcas * object using the treetagger program. * * @param jcas JCas object supplied by the pipeline */ private void doTreeTag(JCas jcas) { File tmpDocument = null; BufferedWriter tmpFileWriter; ArrayList<Token> tokens = new ArrayList<Token>(); try { // create a temporary file and write our pre-existing tokens to it. tmpDocument = File.createTempFile("postokens", null); tmpFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmpDocument), "UTF-8")); // iterate over existing tokens FSIterator ai = jcas.getAnnotationIndex(Token.type).iterator(); while (ai.hasNext()) { Token t = (Token) ai.next(); tokens.add(t); if (!(t.getBegin() == t.getEnd())) { tmpFileWriter.write(t.getCoveredText() + ttprops.newLineSeparator); } } tmpFileWriter.close(); } catch (IOException e) { Logger.printError( "Something went wrong creating a temporary file for the treetagger to process."); System.exit(-1); } // Possible End-of-Sentence Tags HashSet<String> hsEndOfSentenceTag = new HashSet<String>(); hsEndOfSentenceTag.add("SENT"); // ENGLISH, FRENCH, GREEK, hsEndOfSentenceTag.add("$."); // GERMAN, DUTCH hsEndOfSentenceTag.add("FS"); // SPANISH hsEndOfSentenceTag.add("_Z_Fst"); // ESTONIAN hsEndOfSentenceTag.add("_Z_Int"); // ESTONIAN hsEndOfSentenceTag.add("_Z_Exc"); // ESTONIAN hsEndOfSentenceTag.add("ew"); // CHINESE try { Process p = ttprops.getTreeTaggingProcess(tmpDocument); Logger.printDetail(component, "TreeTagger (pos tagging) with: " + ttprops.parFileName); BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream(), "UTF-8")); Sentence sentence = null; // iterate over all the output lines and tokens array (which have the same source and are // hence symmetric) int i = 0; String s = null; while ((s = in.readLine()) != null) { // grab a token Token token = tokens.get(i++); // modified (Aug 29, 2011): Handle empty tokens (such as empty lines) in input file while (token.getCoveredText().equals("")) { // if part of the configuration, also add sentences to the jcas document if ((annotate_sentences) && (token.getPos() != null && token.getPos().equals("EMPTYLINE"))) { // Establish sentence structure if (sentence == null) { sentence = new Sentence(jcas); sentence.setBegin(token.getBegin()); } // Finish current sentence if end-of-sentence pos was found or document ended sentence.setEnd(token.getEnd()); if (sentence.getBegin() < sentence.getEnd()) { sentence.addToIndexes(); } // Make sure current sentence is not active anymore so that a new one might be created sentence = null; // sentence = new Sentence(jcas); } token.removeFromIndexes(); token = tokens.get(i++); } // remove tokens, otherwise they are in the index twice token.removeFromIndexes(); // set part of speech tag and add to indexes again if (!(token.getCoveredText().equals(""))) { token.setPos(s); token.addToIndexes(); } // if part of the configuration, also add sentences to the jcas document if (annotate_sentences) { // Establish sentence structure if (sentence == null) { sentence = new Sentence(jcas); sentence.setBegin(token.getBegin()); } // Finish current sentence if end-of-sentence pos was found or document ended if (hsEndOfSentenceTag.contains(s) || i == tokens.size()) { sentence.setEnd(token.getEnd()); sentence.addToIndexes(); // Make sure current sentence is not active anymore so that a new one might be created sentence = null; // sentence = new Sentence(jcas); } } } while (i < tokens.size()) { if (!(sentence == null)) { sentence.setEnd(tokens.get(tokens.size() - 1).getEnd()); sentence.addToIndexes(); } Token token = tokens.get(i++); if (token.getPos() != null && token.getPos().equals("EMPTYLINE")) { token.removeFromIndexes(); } } in.close(); p.destroy(); } catch (Exception e) { e.printStackTrace(); } finally { // Delete temporary files tmpDocument.delete(); } }