Ejemplo n.º 1
0
 @Override
 public void initialize(UimaContext context) throws ResourceInitializationException {
   super.initialize(context);
   // classToSlotNamesToRemoveMap = new HashMap<String, Set<String>>();
   // /* read in input parameters, and initialize a list of slots to remove */
   // for (String classSlotPair : classSlotPairsToRemove) {
   //
   // if (classSlotPair.equals(REMOVE_ALL)) {
   // removeAllSlots = true;
   // break;
   // }
   // String[] classSlotName = classSlotPair.split("\\|");
   // if (classSlotName.length != 2) {
   // throw new ResourceInitializationException(new
   // IllegalArgumentException("Invalid class/slot pairing: "
   // + classSlotPair));
   // }
   // String classNameRegex = classSlotName[0];
   // String slotName = classSlotName[1];
   // CollectionsUtil.addToOne2ManyUniqueMap(classNameRegex, slotName,
   // classToSlotNamesToRemoveMap);
   // }
   logger = context.getLogger();
   logger.log(
       Level.INFO,
       "SlotRemovalFilter_AE initialized. Will remove slots defined by: " + removeOption);
 }
  public final void initialize(UimaContext context) throws ResourceInitializationException {

    super.initialize(context);

    this.context = context;

    mLogger = context.getLogger();

    if (mLogger.isLoggable(Level.INFO)) {
      mLogger.log(Level.INFO, "Initializing the " + name + ".");
    }

    isRemoveExistingAnnotations =
        AnnotatorUtil.getOptionalBooleanParameter(context, UimaUtil.IS_REMOVE_EXISTINGS_ANNOTAIONS);

    if (isRemoveExistingAnnotations == null) {
      isRemoveExistingAnnotations = false;
    }

    initialize();
  }
  @Override
  public void initialize() throws ResourceInitializationException {
    super.initialize();

    logger = getLogger();

    try {
      evalCorpusReader =
          new OMTwitterCorpusFileReader(
              (String) getConfigParameterValue(PARAM_EVALUATION_CORPUS_FILE),
              (String) getConfigParameterValue(PARAM_EVALUATION_CORPUS_DELIM),
              OMTwitterCorpusFile.fieldNameToId(
                  (String) getConfigParameterValue(PARAM_EVALUATION_CORPUS_FIELDS), " "));
    } catch (Exception e) {
      logger.log(Level.SEVERE, e.getMessage());
      throw new ResourceInitializationException(e);
    }

    printResult = (Boolean) getConfigParameterValue(PARAM_PRINT_RESULT);

    String neTagsStr = (String) getConfigParameterValue(PARAM_NAMED_ENTITY_TAGS);
    if (neTagsStr == null) {
      throw new ResourceInitializationException();
    }

    labelNone = (String) getConfigParameterValue(PARAM_LABEL_NONE);

    String[] neTags = neTagsStr.split(" ");
    int idx = 0;
    map = new HashMap<String, Integer>();

    for (String tag : neTags) {
      map.put(tag + "_B", idx++);
      map.put(tag + "_M", idx++);
      map.put(tag + "_E", idx++);
    }
    map.put(labelNone, idx++);
    labelNoneIdx = idx - 1;

    stat = new int[idx][3];
    senti = new int[3][3];
    classifiedEntityCnt = new int[idx / 3];
    answerEntityCnt = new int[idx / 3];
  }
  /* (non-Javadoc)
   * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
   */
  public void processCas(CAS aCAS) throws ResourceProcessException {
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    } catch (CASException e) {
      logger.log(Level.SEVERE, e.getMessage());
      throw new ResourceProcessException(e);
    }

    TweetAnnotation tweetAnn =
        (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next();
    OMTweet answerTweet = evalCorpusReader.next();

    if (!answerTweet.getId().equals(tweetAnn.getId())) {
      logger.log(
          Level.SEVERE,
          "target corpus and evaluation corpus don't match to each other - "
              + answerTweet.getId()
              + ", "
              + tweetAnn.getId());
      throw new ResourceProcessException();
    }

    String[] entity = extractEntityTags(answerTweet.getText());

    String classified = null;
    String prevClassified = null;
    StringBuffer sb = new StringBuffer();
    try {
      sb.append("\n[");
      sb.append(answerTweet.getPolarityString());
      sb.append("=>");
      sb.append(tweetAnn.getPolarity());
      sb.append("] ");
      sb.append(tweetAnn.getCoveredText());
      sb.append('\n');

      FSIterator<Annotation> tokenAnnIter =
          jcas.getAnnotationIndex(TokenAnnotation.type).iterator();
      TokenAnnotation tokenAnn = null;

      int i = 0;
      int prevClassifiedIdx = labelNoneIdx;
      int prevAnswerIdx = labelNoneIdx;
      String classifiedEntityStr = "";
      String answerEntityStr = "";

      while (tokenAnnIter.hasNext()) {
        tokenAnn = (TokenAnnotation) tokenAnnIter.next();

        classified = tokenAnn.getEntityLabel();
        String answer = entity[i];
        boolean correct = false;
        if (classified.equals(answer)) {
          correct = true;
        }

        int classifiedIdx = 0;
        int answerIdx = 0;
        try {
          answerIdx = map.get(answer);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation on the evaluation corpus - tweet id: "
                  + answerTweet.getId()
                  + ", answerTag="
                  + answer);
          logger.log(Level.SEVERE, e.getMessage());
          answerIdx = map.get(labelNone);
        }
        try {
          classifiedIdx = map.get(classified);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation from the NER - tweet id: "
                  + answerTweet.getId()
                  + ", classifiedTag="
                  + classified);
          logger.log(Level.SEVERE, e.getMessage());
          classifiedIdx = map.get(labelNone);
        }

        stat[classifiedIdx][0]++;
        stat[answerIdx][1]++;

        if (correct) {
          stat[classifiedIdx][2]++;
        }

        if (classifiedIdx != labelNoneIdx) {
          if (classifiedIdx / 3 != prevClassifiedIdx / 3) {
            classifiedEntityCnt[classifiedIdx / 3]++;
            if (prevClassifiedIdx != labelNoneIdx) {
              sb.append('\t');
              sb.append(classifiedEntityStr);
              sb.append(" -> ");
              sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
              sb.append('\n');
            }
            classifiedEntityStr = tokenAnn.getCoveredText();
          } else {
            classifiedEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevClassifiedIdx != labelNoneIdx) {
          sb.append('\t');
          sb.append(classifiedEntityStr);
          sb.append(" -> ");
          sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
          sb.append('\n');
          classifiedEntityStr = "";
        }
        prevClassifiedIdx = classifiedIdx;

        if (answerIdx != labelNoneIdx) {
          if (answerIdx / 3 != prevAnswerIdx / 3) {
            answerEntityCnt[answerIdx / 3]++;
            answerEntityStr = tokenAnn.getCoveredText();
          } else {
            answerEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevAnswerIdx != labelNoneIdx) {
          answerEntityStr = "";
        }

        prevAnswerIdx = answerIdx;
        prevClassified = classified;
        i++;
      }
      if (prevClassifiedIdx != labelNoneIdx) {
        sb.append('\t');
        sb.append(classifiedEntityStr);
        sb.append(" -> ");
        sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
        sb.append('\n');
      }

      // senti
      String answerSenti = answerTweet.getPolarityString();

      boolean correct = false;
      String classifiedSenti = tweetAnn.getPolarity();
      if (classifiedSenti.equals(senti)) {
        correct = true;
      }

      int classifiedIdx = sentiIdx(classifiedSenti);
      int answerIdx = sentiIdx(answerSenti);

      senti[classifiedIdx][0]++;
      senti[answerIdx][1]++;
      if (classifiedIdx == answerIdx) {
        correct = true;
      }

      if (correct) {
        senti[classifiedIdx][2]++;
      }
      cnt++;

      logger.log(Level.INFO, sb.toString());

    } catch (CASRuntimeException e) {
      throw new ResourceProcessException(e);
    }
  }