private void convert(JCas aJCas, PrintWriter aOut) { Type chunkType = JCasUtil.getType(aJCas, Chunk.class); Feature chunkValue = chunkType.getFeatureByBaseName("chunkValue"); for (Sentence sentence : select(aJCas, Sentence.class)) { HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>(); // Tokens List<Token> tokens = selectCovered(Token.class, sentence); // Chunks IobEncoder encoder = new IobEncoder(aJCas.getCas(), chunkType, chunkValue); for (int i = 0; i < tokens.size(); i++) { Row row = new Row(); row.id = i + 1; row.token = tokens.get(i); row.chunk = encoder.encode(tokens.get(i)); ctokens.put(row.token, row); } // Write sentence in CONLL 2006 format for (Row row : ctokens.values()) { String pos = UNUSED; if (writePos && (row.token.getPos() != null)) { POS posAnno = row.token.getPos(); pos = posAnno.getPosValue(); } String chunk = UNUSED; if (writeChunk && (row.chunk != null)) { chunk = encoder.encode(row.token); } aOut.printf("%s %s %s\n", row.token.getCoveredText(), pos, chunk); } aOut.println(); } }
private void convert(JCas aJCas, PrintWriter aOut) { Map<Token, Collection<SemanticPredicate>> predIdx = indexCovered(aJCas, Token.class, SemanticPredicate.class); Map<SemanticArgument, Collection<Token>> argIdx = indexCovered(aJCas, SemanticArgument.class, Token.class); for (Sentence sentence : select(aJCas, Sentence.class)) { HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>(); // Tokens List<Token> tokens = selectCovered(Token.class, sentence); // Check if we should try to include the FEATS in output List<MorphologicalFeatures> morphology = selectCovered(MorphologicalFeatures.class, sentence); boolean useFeats = tokens.size() == morphology.size(); List<SemanticPredicate> preds = selectCovered(SemanticPredicate.class, sentence); for (int i = 0; i < tokens.size(); i++) { Row row = new Row(); row.id = i + 1; row.token = tokens.get(i); row.args = new SemanticArgument[preds.size()]; if (useFeats) { row.feats = morphology.get(i); } // If there are multiple semantic predicates for the current token, then // we keep only the first Collection<SemanticPredicate> predsForToken = predIdx.get(row.token); if (predsForToken != null && !predsForToken.isEmpty()) { row.pred = predsForToken.iterator().next(); } ctokens.put(row.token, row); } // Dependencies for (Dependency rel : selectCovered(Dependency.class, sentence)) { ctokens.get(rel.getDependent()).deprel = rel; } // Semantic arguments for (int p = 0; p < preds.size(); p++) { FSArray args = preds.get(p).getArguments(); for (SemanticArgument arg : select(args, SemanticArgument.class)) { for (Token t : argIdx.get(arg)) { Row row = ctokens.get(t); row.args[p] = arg; } } } // Write sentence in CONLL 2009 format for (Row row : ctokens.values()) { int id = row.id; String form = row.token.getCoveredText(); String lemma = UNUSED; if (writeLemma && (row.token.getLemma() != null)) { lemma = row.token.getLemma().getValue(); } String plemma = lemma; String pos = UNUSED; if (writePos && (row.token.getPos() != null)) { POS posAnno = row.token.getPos(); pos = posAnno.getPosValue(); } String ppos = pos; String feat = UNUSED; if (writeMorph && (row.feats != null)) { feat = row.feats.getValue(); } String pfeat = feat; int headId = UNUSED_INT; String deprel = UNUSED; if (writeDependency && (row.deprel != null)) { deprel = row.deprel.getDependencyType(); headId = ctokens.get(row.deprel.getGovernor()).id; if (headId == row.id) { // ROOT dependencies may be modeled as a loop, ignore these. headId = 0; } } String head = UNUSED; if (headId != UNUSED_INT) { head = Integer.toString(headId); } String phead = head; String pdeprel = deprel; String fillpred = UNUSED; String pred = UNUSED; StringBuilder apreds = new StringBuilder(); if (writeSemanticPredicate) { if (row.pred != null) { fillpred = "Y"; pred = row.pred.getCategory(); } for (SemanticArgument arg : row.args) { if (apreds.length() > 0) { apreds.append('\t'); } apreds.append(arg != null ? arg.getRole() : UNUSED); } } aOut.printf( "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", id, form, lemma, plemma, pos, ppos, feat, pfeat, head, phead, deprel, pdeprel, fillpred, pred, apreds); } aOut.println(); } }