@Override
  public Vector<Double> calculateScores(JCas aCas) throws ScoringComponentException {
    // 1) how many words of H (extended with multiple relations) can be
    // found in T divided by the length of H
    Vector<Double> scoresVector = new Vector<Double>();

    try {
      JCas tView = aCas.getView("TextView");
      HashMap<String, Integer> tBag = countTokens(tView);

      JCas hView = aCas.getView("HypothesisView");
      HashMap<String, Integer> hBag = countTokens(hView);

      if (null != wnlrSet && wnlrSet.size() != 0) {
        for (WordnetLexicalResource wnlr : wnlrSet) {
          scoresVector.add(calculateSingleLexScoreWithWNRelations(tBag, hBag, wnlr));
        }
      }
      if (null != volrSet && volrSet.size() != 0) {
        for (VerbOceanLexicalResource volr : volrSet) {
          scoresVector.add(calculateSingleLexScoreWithVORelations(tBag, hBag, volr));
        }
      }
    } catch (CASException e) {
      throw new ScoringComponentException(e.getMessage());
    }
    return scoresVector;
  }
コード例 #2
0
  @Override
  public Vector<Double> calculateScores(JCas aCas) throws ScoringComponentException {
    // all the values: (T&H/H), (T&H/T), and ((T&H/H)*(T&H/T)), with four
    // different matching types
    Vector<Double> scoresVector = new Vector<Double>();

    try {
      JCas tView = aCas.getView("TextView");
      JCas hView = aCas.getView("HypothesisView");

      for (int i = 1; i < 5; i++) {
        HashMap<String, Integer> tBag = countDeps(tView, i);
        HashMap<String, Integer> hBag = countDeps(hView, i);
        scoresVector.addAll(calculateSimilarity(tBag, hBag));
      }

    } catch (CASException e) {
      throw new ScoringComponentException(e.getMessage());
    }
    return scoresVector;
  }
コード例 #3
0
  /* (non-Javadoc)
   * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
   */
  public void processCas(CAS aCAS) throws ResourceProcessException {
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    } catch (CASException e) {
      logger.log(Level.SEVERE, e.getMessage());
      throw new ResourceProcessException(e);
    }

    TweetAnnotation tweetAnn =
        (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next();
    OMTweet answerTweet = evalCorpusReader.next();

    if (!answerTweet.getId().equals(tweetAnn.getId())) {
      logger.log(
          Level.SEVERE,
          "target corpus and evaluation corpus don't match to each other - "
              + answerTweet.getId()
              + ", "
              + tweetAnn.getId());
      throw new ResourceProcessException();
    }

    String[] entity = extractEntityTags(answerTweet.getText());

    String classified = null;
    String prevClassified = null;
    StringBuffer sb = new StringBuffer();
    try {
      sb.append("\n[");
      sb.append(answerTweet.getPolarityString());
      sb.append("=>");
      sb.append(tweetAnn.getPolarity());
      sb.append("] ");
      sb.append(tweetAnn.getCoveredText());
      sb.append('\n');

      FSIterator<Annotation> tokenAnnIter =
          jcas.getAnnotationIndex(TokenAnnotation.type).iterator();
      TokenAnnotation tokenAnn = null;

      int i = 0;
      int prevClassifiedIdx = labelNoneIdx;
      int prevAnswerIdx = labelNoneIdx;
      String classifiedEntityStr = "";
      String answerEntityStr = "";

      while (tokenAnnIter.hasNext()) {
        tokenAnn = (TokenAnnotation) tokenAnnIter.next();

        classified = tokenAnn.getEntityLabel();
        String answer = entity[i];
        boolean correct = false;
        if (classified.equals(answer)) {
          correct = true;
        }

        int classifiedIdx = 0;
        int answerIdx = 0;
        try {
          answerIdx = map.get(answer);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation on the evaluation corpus - tweet id: "
                  + answerTweet.getId()
                  + ", answerTag="
                  + answer);
          logger.log(Level.SEVERE, e.getMessage());
          answerIdx = map.get(labelNone);
        }
        try {
          classifiedIdx = map.get(classified);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation from the NER - tweet id: "
                  + answerTweet.getId()
                  + ", classifiedTag="
                  + classified);
          logger.log(Level.SEVERE, e.getMessage());
          classifiedIdx = map.get(labelNone);
        }

        stat[classifiedIdx][0]++;
        stat[answerIdx][1]++;

        if (correct) {
          stat[classifiedIdx][2]++;
        }

        if (classifiedIdx != labelNoneIdx) {
          if (classifiedIdx / 3 != prevClassifiedIdx / 3) {
            classifiedEntityCnt[classifiedIdx / 3]++;
            if (prevClassifiedIdx != labelNoneIdx) {
              sb.append('\t');
              sb.append(classifiedEntityStr);
              sb.append(" -> ");
              sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
              sb.append('\n');
            }
            classifiedEntityStr = tokenAnn.getCoveredText();
          } else {
            classifiedEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevClassifiedIdx != labelNoneIdx) {
          sb.append('\t');
          sb.append(classifiedEntityStr);
          sb.append(" -> ");
          sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
          sb.append('\n');
          classifiedEntityStr = "";
        }
        prevClassifiedIdx = classifiedIdx;

        if (answerIdx != labelNoneIdx) {
          if (answerIdx / 3 != prevAnswerIdx / 3) {
            answerEntityCnt[answerIdx / 3]++;
            answerEntityStr = tokenAnn.getCoveredText();
          } else {
            answerEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevAnswerIdx != labelNoneIdx) {
          answerEntityStr = "";
        }

        prevAnswerIdx = answerIdx;
        prevClassified = classified;
        i++;
      }
      if (prevClassifiedIdx != labelNoneIdx) {
        sb.append('\t');
        sb.append(classifiedEntityStr);
        sb.append(" -> ");
        sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
        sb.append('\n');
      }

      // senti
      String answerSenti = answerTweet.getPolarityString();

      boolean correct = false;
      String classifiedSenti = tweetAnn.getPolarity();
      if (classifiedSenti.equals(senti)) {
        correct = true;
      }

      int classifiedIdx = sentiIdx(classifiedSenti);
      int answerIdx = sentiIdx(answerSenti);

      senti[classifiedIdx][0]++;
      senti[answerIdx][1]++;
      if (classifiedIdx == answerIdx) {
        correct = true;
      }

      if (correct) {
        senti[classifiedIdx][2]++;
      }
      cnt++;

      logger.log(Level.INFO, sb.toString());

    } catch (CASRuntimeException e) {
      throw new ResourceProcessException(e);
    }
  }