Ejemplo n.º 1
0
  private void convert(JCas aJCas, PrintWriter aOut) {
    Type chunkType = JCasUtil.getType(aJCas, Chunk.class);
    Feature chunkValue = chunkType.getFeatureByBaseName("chunkValue");

    for (Sentence sentence : select(aJCas, Sentence.class)) {
      HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();

      // Tokens
      List<Token> tokens = selectCovered(Token.class, sentence);

      // Chunks
      IobEncoder encoder = new IobEncoder(aJCas.getCas(), chunkType, chunkValue);

      for (int i = 0; i < tokens.size(); i++) {
        Row row = new Row();
        row.id = i + 1;
        row.token = tokens.get(i);
        row.chunk = encoder.encode(tokens.get(i));
        ctokens.put(row.token, row);
      }

      // Write sentence in CONLL 2006 format
      for (Row row : ctokens.values()) {
        String pos = UNUSED;
        if (writePos && (row.token.getPos() != null)) {
          POS posAnno = row.token.getPos();
          pos = posAnno.getPosValue();
        }

        String chunk = UNUSED;
        if (writeChunk && (row.chunk != null)) {
          chunk = encoder.encode(row.token);
        }

        aOut.printf("%s %s %s\n", row.token.getCoveredText(), pos, chunk);
      }

      aOut.println();
    }
  }