Пример #1
0
  private void convert(JCas aJCas, PrintWriter aOut) {
    Type chunkType = JCasUtil.getType(aJCas, Chunk.class);
    Feature chunkValue = chunkType.getFeatureByBaseName("chunkValue");

    for (Sentence sentence : select(aJCas, Sentence.class)) {
      HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();

      // Tokens
      List<Token> tokens = selectCovered(Token.class, sentence);

      // Chunks
      IobEncoder encoder = new IobEncoder(aJCas.getCas(), chunkType, chunkValue);

      for (int i = 0; i < tokens.size(); i++) {
        Row row = new Row();
        row.id = i + 1;
        row.token = tokens.get(i);
        row.chunk = encoder.encode(tokens.get(i));
        ctokens.put(row.token, row);
      }

      // Write sentence in CONLL 2006 format
      for (Row row : ctokens.values()) {
        String pos = UNUSED;
        if (writePos && (row.token.getPos() != null)) {
          POS posAnno = row.token.getPos();
          pos = posAnno.getPosValue();
        }

        String chunk = UNUSED;
        if (writeChunk && (row.chunk != null)) {
          chunk = encoder.encode(row.token);
        }

        aOut.printf("%s %s %s\n", row.token.getCoveredText(), pos, chunk);
      }

      aOut.println();
    }
  }
Пример #2
0
  private void convert(JCas aJCas, PrintWriter aOut) {
    Map<Token, Collection<SemanticPredicate>> predIdx =
        indexCovered(aJCas, Token.class, SemanticPredicate.class);
    Map<SemanticArgument, Collection<Token>> argIdx =
        indexCovered(aJCas, SemanticArgument.class, Token.class);
    for (Sentence sentence : select(aJCas, Sentence.class)) {
      HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();

      // Tokens
      List<Token> tokens = selectCovered(Token.class, sentence);

      // Check if we should try to include the FEATS in output
      List<MorphologicalFeatures> morphology = selectCovered(MorphologicalFeatures.class, sentence);
      boolean useFeats = tokens.size() == morphology.size();

      List<SemanticPredicate> preds = selectCovered(SemanticPredicate.class, sentence);

      for (int i = 0; i < tokens.size(); i++) {
        Row row = new Row();
        row.id = i + 1;
        row.token = tokens.get(i);
        row.args = new SemanticArgument[preds.size()];
        if (useFeats) {
          row.feats = morphology.get(i);
        }

        // If there are multiple semantic predicates for the current token, then
        // we keep only the first
        Collection<SemanticPredicate> predsForToken = predIdx.get(row.token);
        if (predsForToken != null && !predsForToken.isEmpty()) {
          row.pred = predsForToken.iterator().next();
        }
        ctokens.put(row.token, row);
      }

      // Dependencies
      for (Dependency rel : selectCovered(Dependency.class, sentence)) {
        ctokens.get(rel.getDependent()).deprel = rel;
      }

      // Semantic arguments
      for (int p = 0; p < preds.size(); p++) {
        FSArray args = preds.get(p).getArguments();
        for (SemanticArgument arg : select(args, SemanticArgument.class)) {
          for (Token t : argIdx.get(arg)) {
            Row row = ctokens.get(t);
            row.args[p] = arg;
          }
        }
      }

      // Write sentence in CONLL 2009 format
      for (Row row : ctokens.values()) {
        int id = row.id;

        String form = row.token.getCoveredText();

        String lemma = UNUSED;
        if (writeLemma && (row.token.getLemma() != null)) {
          lemma = row.token.getLemma().getValue();
        }
        String plemma = lemma;

        String pos = UNUSED;
        if (writePos && (row.token.getPos() != null)) {
          POS posAnno = row.token.getPos();
          pos = posAnno.getPosValue();
        }
        String ppos = pos;

        String feat = UNUSED;
        if (writeMorph && (row.feats != null)) {
          feat = row.feats.getValue();
        }
        String pfeat = feat;

        int headId = UNUSED_INT;
        String deprel = UNUSED;
        if (writeDependency && (row.deprel != null)) {
          deprel = row.deprel.getDependencyType();
          headId = ctokens.get(row.deprel.getGovernor()).id;
          if (headId == row.id) {
            // ROOT dependencies may be modeled as a loop, ignore these.
            headId = 0;
          }
        }

        String head = UNUSED;
        if (headId != UNUSED_INT) {
          head = Integer.toString(headId);
        }

        String phead = head;
        String pdeprel = deprel;

        String fillpred = UNUSED;
        String pred = UNUSED;
        StringBuilder apreds = new StringBuilder();
        if (writeSemanticPredicate) {
          if (row.pred != null) {
            fillpred = "Y";
            pred = row.pred.getCategory();
          }

          for (SemanticArgument arg : row.args) {
            if (apreds.length() > 0) {
              apreds.append('\t');
            }
            apreds.append(arg != null ? arg.getRole() : UNUSED);
          }
        }

        aOut.printf(
            "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
            id, form, lemma, plemma, pos, ppos, feat, pfeat, head, phead, deprel, pdeprel, fillpred,
            pred, apreds);
      }

      aOut.println();
    }
  }