@Override
  public Vector<Double> calculateScores(JCas aCas) throws ScoringComponentException {
    // 1) how many words of H (extended with multiple relations) can be
    // found in T divided by the length of H
    Vector<Double> scoresVector = new Vector<Double>();

    try {
      JCas tView = aCas.getView("TextView");
      HashMap<String, Integer> tBag = countTokens(tView);

      JCas hView = aCas.getView("HypothesisView");
      HashMap<String, Integer> hBag = countTokens(hView);

      if (null != wnlrSet && wnlrSet.size() != 0) {
        for (WordnetLexicalResource wnlr : wnlrSet) {
          scoresVector.add(calculateSingleLexScoreWithWNRelations(tBag, hBag, wnlr));
        }
      }
      if (null != volrSet && volrSet.size() != 0) {
        for (VerbOceanLexicalResource volr : volrSet) {
          scoresVector.add(calculateSingleLexScoreWithVORelations(tBag, hBag, volr));
        }
      }
    } catch (CASException e) {
      throw new ScoringComponentException(e.getMessage());
    }
    return scoresVector;
  }
コード例 #2
0
  @Override
  public Vector<Double> calculateScores(JCas aCas) throws ScoringComponentException {
    // all the values: (T&H/H), (T&H/T), and ((T&H/H)*(T&H/T)), with four
    // different matching types
    Vector<Double> scoresVector = new Vector<Double>();

    try {
      JCas tView = aCas.getView("TextView");
      JCas hView = aCas.getView("HypothesisView");

      for (int i = 1; i < 5; i++) {
        HashMap<String, Integer> tBag = countDeps(tView, i);
        HashMap<String, Integer> hBag = countDeps(hView, i);
        scoresVector.addAll(calculateSimilarity(tBag, hBag));
      }

    } catch (CASException e) {
      throw new ScoringComponentException(e.getMessage());
    }
    return scoresVector;
  }
コード例 #3
0
	public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
		if (aStatus != null) {
			if (aStatus.isException()) {
				System.err.println("Error on process CAS call to remote service:");
				List<Exception> exceptions = aStatus.getExceptions();
				for (int i = 0; i < exceptions.size(); i++) {
					((Throwable) exceptions.get(i)).printStackTrace();
				}
			}
			
			try {
				JCas cas = aCas.getJCas();

				for(Token token : JCasUtil.select(cas, Token.class)) {
					System.out.println(token.getCoveredText() + " " + token.getPos().getPosValue());
				}

			} catch (CASException e) {
				e.printStackTrace();
			}
		}
	}
  /**
   * CasConsumer would use tags and features to write output file, evaluate and print precision,
   * recall and F-1 measure.
   *
   * @param arg0
   * @throws ResourceProcessException
   */
  @Override
  public void processCas(CAS arg0) throws ResourceProcessException {
    /** convert type of arg0 */
    JCas jcas = null;
    try {
      jcas = arg0.getJCas();
    } catch (CASException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    // TODO Auto-generated method stub
    FSIterator<Annotation> ite = jcas.getAnnotationIndex(WordTag.type).iterator();

    while (ite.hasNext()) {
      /** collect features */
      String id = ((WordTag) ite.get()).getId();
      int begin = ((WordTag) ite.get()).getBegin0();
      int end = ((WordTag) ite.get()).getEnd0();
      String name = ((WordTag) ite.get()).getName();

      /** organize string for output */
      report.append(id);
      report.append("|");
      report.append(begin);
      report.append(" ");
      report.append(end);
      report.append("|");
      report.append(name);
      report.append("\n");

      /** count the length of output string */
      count++;
      ite.next();
    }

    result = report.toString();
    File sampleOut = new File("src/main/resources/data/sample.out");
    try {
      testRecall = FileUtils.file2String(sampleOut);
    } catch (IOException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }

    /** split strings from file into sentences */
    String[] resultSplit = result.split("\n");
    String[] recallSplit = testRecall.split("\n");
    PrecisionRecallCalculator(recallSplit, resultSplit);

    /** write the output file to the project root */
    String path = "hw1-longh.out";
    File dirFile = new File(path);

    /** make sure no conflict */
    if (dirFile.exists()) {
      dirFile.delete();
    }

    try {
      /** write file */
      BufferedWriter bw1 = new BufferedWriter(new FileWriter(path, true));
      bw1.write(report.toString());
      bw1.flush();
      bw1.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
コード例 #5
0
  /* (non-Javadoc)
   * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
   */
  public void processCas(CAS aCAS) throws ResourceProcessException {
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    } catch (CASException e) {
      logger.log(Level.SEVERE, e.getMessage());
      throw new ResourceProcessException(e);
    }

    TweetAnnotation tweetAnn =
        (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next();
    OMTweet answerTweet = evalCorpusReader.next();

    if (!answerTweet.getId().equals(tweetAnn.getId())) {
      logger.log(
          Level.SEVERE,
          "target corpus and evaluation corpus don't match to each other - "
              + answerTweet.getId()
              + ", "
              + tweetAnn.getId());
      throw new ResourceProcessException();
    }

    String[] entity = extractEntityTags(answerTweet.getText());

    String classified = null;
    String prevClassified = null;
    StringBuffer sb = new StringBuffer();
    try {
      sb.append("\n[");
      sb.append(answerTweet.getPolarityString());
      sb.append("=>");
      sb.append(tweetAnn.getPolarity());
      sb.append("] ");
      sb.append(tweetAnn.getCoveredText());
      sb.append('\n');

      FSIterator<Annotation> tokenAnnIter =
          jcas.getAnnotationIndex(TokenAnnotation.type).iterator();
      TokenAnnotation tokenAnn = null;

      int i = 0;
      int prevClassifiedIdx = labelNoneIdx;
      int prevAnswerIdx = labelNoneIdx;
      String classifiedEntityStr = "";
      String answerEntityStr = "";

      while (tokenAnnIter.hasNext()) {
        tokenAnn = (TokenAnnotation) tokenAnnIter.next();

        classified = tokenAnn.getEntityLabel();
        String answer = entity[i];
        boolean correct = false;
        if (classified.equals(answer)) {
          correct = true;
        }

        int classifiedIdx = 0;
        int answerIdx = 0;
        try {
          answerIdx = map.get(answer);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation on the evaluation corpus - tweet id: "
                  + answerTweet.getId()
                  + ", answerTag="
                  + answer);
          logger.log(Level.SEVERE, e.getMessage());
          answerIdx = map.get(labelNone);
        }
        try {
          classifiedIdx = map.get(classified);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "wrong annotation from the NER - tweet id: "
                  + answerTweet.getId()
                  + ", classifiedTag="
                  + classified);
          logger.log(Level.SEVERE, e.getMessage());
          classifiedIdx = map.get(labelNone);
        }

        stat[classifiedIdx][0]++;
        stat[answerIdx][1]++;

        if (correct) {
          stat[classifiedIdx][2]++;
        }

        if (classifiedIdx != labelNoneIdx) {
          if (classifiedIdx / 3 != prevClassifiedIdx / 3) {
            classifiedEntityCnt[classifiedIdx / 3]++;
            if (prevClassifiedIdx != labelNoneIdx) {
              sb.append('\t');
              sb.append(classifiedEntityStr);
              sb.append(" -> ");
              sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
              sb.append('\n');
            }
            classifiedEntityStr = tokenAnn.getCoveredText();
          } else {
            classifiedEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevClassifiedIdx != labelNoneIdx) {
          sb.append('\t');
          sb.append(classifiedEntityStr);
          sb.append(" -> ");
          sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
          sb.append('\n');
          classifiedEntityStr = "";
        }
        prevClassifiedIdx = classifiedIdx;

        if (answerIdx != labelNoneIdx) {
          if (answerIdx / 3 != prevAnswerIdx / 3) {
            answerEntityCnt[answerIdx / 3]++;
            answerEntityStr = tokenAnn.getCoveredText();
          } else {
            answerEntityStr += " " + tokenAnn.getCoveredText();
          }
        } else if (prevAnswerIdx != labelNoneIdx) {
          answerEntityStr = "";
        }

        prevAnswerIdx = answerIdx;
        prevClassified = classified;
        i++;
      }
      if (prevClassifiedIdx != labelNoneIdx) {
        sb.append('\t');
        sb.append(classifiedEntityStr);
        sb.append(" -> ");
        sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_')));
        sb.append('\n');
      }

      // senti
      String answerSenti = answerTweet.getPolarityString();

      boolean correct = false;
      String classifiedSenti = tweetAnn.getPolarity();
      if (classifiedSenti.equals(senti)) {
        correct = true;
      }

      int classifiedIdx = sentiIdx(classifiedSenti);
      int answerIdx = sentiIdx(answerSenti);

      senti[classifiedIdx][0]++;
      senti[answerIdx][1]++;
      if (classifiedIdx == answerIdx) {
        correct = true;
      }

      if (correct) {
        senti[classifiedIdx][2]++;
      }
      cnt++;

      logger.log(Level.INFO, sb.toString());

    } catch (CASRuntimeException e) {
      throw new ResourceProcessException(e);
    }
  }