Esempio n. 1
0
  /* (non-Javadoc)
   * @see com.maalaang.omtwitter.ml.SvmExampleTargetExtractor#extractTargetFromCas(org.apache.uima.cas.CAS)
   */
  public int extractTargetFromJCas(JCas jcas) {
    TweetAnnotation tweetAnn =
        (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next();
    String polarity = tweetAnn.getPolarity();

    if (polarity.equals(OMTweet.POLARITY_STR_POSITIVE)) {
      return 1;
    } else if (polarity.equals(OMTweet.POLARITY_STR_NEGATIVE)) {
      return -1;
    } else {
      return 0;
    }
  }
  /* (non-Javadoc)
   * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
   */
  public void processCas(CAS aCAS) throws ResourceProcessException {
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    } catch (CASException e) {
      logger.log(Level.SEVERE, e.getMessage());
      throw new ResourceProcessException(e);
    }

    TweetAnnotation tweetAnn =
        (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next();
    OMTweet answerTweet = evalCorpusReader.next();

    if (!answerTweet.getId().equals(tweetAnn.getId())) {
      logger.log(
          Level.SEVERE,
          "target corpus and evaluation corpus don't match to each other - "
              + answerTweet.getId()
              + ", "
              + tweetAnn.getId());
      throw new ResourceProcessException();
    }

    String[] entity = extractEntityTags(answerTweet.getText());

    String classified = null;
    String prevClassified = null;
    StringBuffer sb = new StringBuffer();
    try {
      sb.append("\n[");
      sb.append(answerTweet.getPolarityString());
      sb.append("=>");
      sb.append(tweetAnn.getPolarity());
      sb.append("] ");
      sb.append(tweetAnn.getCoveredText());
      sb.append('\n');

      FSIterator<Annotation> tokenAnnIter =
          jcas.getAnnotationIndex(TokenAnnotation.type).iterator();
      TokenAnnotation tokenAnn = null;

      int i = 0;
      int prevClassifiedIdx = labelNoneIdx;
      int prevAnswerIdx = labelNoneIdx;
      String classifiedEntityStr = "";
      String answerEntityStr = "";

      while (tokenAnnIter.hasNext()) {
        tokenAnn = (TokenAnnotation) tokenAnnIter.next();

        classified = tokenAnn.getEntityLabel();
        String answer = entity[i];
        boolean correct = false;
        if (classified.equals(answer)) {
          correct = true;
        }

        int classifiedIdx = 0;
        int answerIdx = 0;
        try {
          answerIdx = map.get(answer);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation on the evaluation corpus - tweet id: "
                  + answerTweet.getId()
                  + ", answerTag="
                  + answer);
          logger.log(Level.SEVERE, e.getMessage());
          answerIdx = map.get(labelNone);
        }
        try {
          classifiedIdx = map.get(classified);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation from the NER - tweet id: "
                  + answerTweet.getId()
                  + ", classifiedTag="
                  + classified);
          logger.log(Level.SEVERE, e.getMessage());
          classifiedIdx = map.get(labelNone);
        }

        stat[classifiedIdx][0]++;
        stat[answerIdx][1]++;

        if (correct) {
          stat[classifiedIdx][2]++;
        }

        if (classifiedIdx != labelNoneIdx) {
          if (classifiedIdx / 3 != prevClassifiedIdx / 3) {
            classifiedEntityCnt[classifiedIdx / 3]++;
            if (prevClassifiedIdx != labelNoneIdx) {
              sb.append('\t');
              sb.append(classifiedEntityStr);
              sb.append(" -> ");
              sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
              sb.append('\n');
            }
            classifiedEntityStr = tokenAnn.getCoveredText();
          } else {
            classifiedEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevClassifiedIdx != labelNoneIdx) {
          sb.append('\t');
          sb.append(classifiedEntityStr);
          sb.append(" -> ");
          sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
          sb.append('\n');
          classifiedEntityStr = "";
        }
        prevClassifiedIdx = classifiedIdx;

        if (answerIdx != labelNoneIdx) {
          if (answerIdx / 3 != prevAnswerIdx / 3) {
            answerEntityCnt[answerIdx / 3]++;
            answerEntityStr = tokenAnn.getCoveredText();
          } else {
            answerEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevAnswerIdx != labelNoneIdx) {
          answerEntityStr = "";
        }

        prevAnswerIdx = answerIdx;
        prevClassified = classified;
        i++;
      }
      if (prevClassifiedIdx != labelNoneIdx) {
        sb.append('\t');
        sb.append(classifiedEntityStr);
        sb.append(" -> ");
        sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
        sb.append('\n');
      }

      // senti
      String answerSenti = answerTweet.getPolarityString();

      boolean correct = false;
      String classifiedSenti = tweetAnn.getPolarity();
      if (classifiedSenti.equals(senti)) {
        correct = true;
      }

      int classifiedIdx = sentiIdx(classifiedSenti);
      int answerIdx = sentiIdx(answerSenti);

      senti[classifiedIdx][0]++;
      senti[answerIdx][1]++;
      if (classifiedIdx == answerIdx) {
        correct = true;
      }

      if (correct) {
        senti[classifiedIdx][2]++;
      }
      cnt++;

      logger.log(Level.INFO, sb.toString());

    } catch (CASRuntimeException e) {
      throw new ResourceProcessException(e);
    }
  }