@Override public Vector<Double> calculateScores(JCas aCas) throws ScoringComponentException { // 1) how many words of H (extended with multiple relations) can be // found in T divided by the length of H Vector<Double> scoresVector = new Vector<Double>(); try { JCas tView = aCas.getView("TextView"); HashMap<String, Integer> tBag = countTokens(tView); JCas hView = aCas.getView("HypothesisView"); HashMap<String, Integer> hBag = countTokens(hView); if (null != wnlrSet && wnlrSet.size() != 0) { for (WordnetLexicalResource wnlr : wnlrSet) { scoresVector.add(calculateSingleLexScoreWithWNRelations(tBag, hBag, wnlr)); } } if (null != volrSet && volrSet.size() != 0) { for (VerbOceanLexicalResource volr : volrSet) { scoresVector.add(calculateSingleLexScoreWithVORelations(tBag, hBag, volr)); } } } catch (CASException e) { throw new ScoringComponentException(e.getMessage()); } return scoresVector; }
@Override public Vector<Double> calculateScores(JCas aCas) throws ScoringComponentException { // all the values: (T&H/H), (T&H/T), and ((T&H/H)*(T&H/T)), with four // different matching types Vector<Double> scoresVector = new Vector<Double>(); try { JCas tView = aCas.getView("TextView"); JCas hView = aCas.getView("HypothesisView"); for (int i = 1; i < 5; i++) { HashMap<String, Integer> tBag = countDeps(tView, i); HashMap<String, Integer> hBag = countDeps(hView, i); scoresVector.addAll(calculateSimilarity(tBag, hBag)); } } catch (CASException e) { throw new ScoringComponentException(e.getMessage()); } return scoresVector; }
public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) { if (aStatus != null) { if (aStatus.isException()) { System.err.println("Error on process CAS call to remote service:"); List<Exception> exceptions = aStatus.getExceptions(); for (int i = 0; i < exceptions.size(); i++) { ((Throwable) exceptions.get(i)).printStackTrace(); } } try { JCas cas = aCas.getJCas(); for(Token token : JCasUtil.select(cas, Token.class)) { System.out.println(token.getCoveredText() + " " + token.getPos().getPosValue()); } } catch (CASException e) { e.printStackTrace(); } } }
/** * CasConsumer would use tags and features to write output file, evaluate and print precision, * recall and F-1 measure. * * @param arg0 * @throws ResourceProcessException */ @Override public void processCas(CAS arg0) throws ResourceProcessException { /** convert type of arg0 */ JCas jcas = null; try { jcas = arg0.getJCas(); } catch (CASException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // TODO Auto-generated method stub FSIterator<Annotation> ite = jcas.getAnnotationIndex(WordTag.type).iterator(); while (ite.hasNext()) { /** collect features */ String id = ((WordTag) ite.get()).getId(); int begin = ((WordTag) ite.get()).getBegin0(); int end = ((WordTag) ite.get()).getEnd0(); String name = ((WordTag) ite.get()).getName(); /** organize string for output */ report.append(id); report.append("|"); report.append(begin); report.append(" "); report.append(end); report.append("|"); report.append(name); report.append("\n"); /** count the length of output string */ count++; ite.next(); } result = report.toString(); File sampleOut = new File("src/main/resources/data/sample.out"); try { testRecall = FileUtils.file2String(sampleOut); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } /** split strings from file into sentences */ String[] resultSplit = result.split("\n"); String[] recallSplit = testRecall.split("\n"); PrecisionRecallCalculator(recallSplit, resultSplit); /** write the output file to the project root */ String path = "hw1-longh.out"; File dirFile = new File(path); /** make sure no conflict */ if (dirFile.exists()) { dirFile.delete(); } try { /** write file */ BufferedWriter bw1 = new BufferedWriter(new FileWriter(path, true)); bw1.write(report.toString()); bw1.flush(); bw1.close(); } catch (IOException e) { e.printStackTrace(); } }
/* (non-Javadoc) * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS aCAS) throws ResourceProcessException { JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } TweetAnnotation tweetAnn = (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next(); OMTweet answerTweet = evalCorpusReader.next(); if (!answerTweet.getId().equals(tweetAnn.getId())) { logger.log( Level.SEVERE, "target corpus and evaluation corpus don't match to each other - " + answerTweet.getId() + ", " + tweetAnn.getId()); throw new ResourceProcessException(); } String[] entity = extractEntityTags(answerTweet.getText()); String classified = null; String prevClassified = null; StringBuffer sb = new StringBuffer(); try { sb.append("\n["); sb.append(answerTweet.getPolarityString()); sb.append("=>"); sb.append(tweetAnn.getPolarity()); sb.append("] "); sb.append(tweetAnn.getCoveredText()); sb.append('\n'); FSIterator<Annotation> tokenAnnIter = jcas.getAnnotationIndex(TokenAnnotation.type).iterator(); TokenAnnotation tokenAnn = null; int i = 0; int prevClassifiedIdx = labelNoneIdx; int prevAnswerIdx = labelNoneIdx; String classifiedEntityStr = ""; String answerEntityStr = ""; while (tokenAnnIter.hasNext()) { tokenAnn = (TokenAnnotation) tokenAnnIter.next(); classified = tokenAnn.getEntityLabel(); String answer = entity[i]; boolean correct = false; if (classified.equals(answer)) { correct = true; } int classifiedIdx = 0; int answerIdx = 0; try { answerIdx = map.get(answer); } catch (Exception e) { logger.log( Level.SEVERE, "wrong annotation on the evaluation corpus - tweet id: " + answerTweet.getId() + ", answerTag=" + answer); logger.log(Level.SEVERE, e.getMessage()); answerIdx = map.get(labelNone); } try { classifiedIdx = map.get(classified); } catch (Exception e) { logger.log( Level.SEVERE, "wrong annotation from the NER - tweet id: " + answerTweet.getId() + ", classifiedTag=" + classified); logger.log(Level.SEVERE, e.getMessage()); classifiedIdx = map.get(labelNone); } stat[classifiedIdx][0]++; stat[answerIdx][1]++; if (correct) { stat[classifiedIdx][2]++; } if (classifiedIdx != labelNoneIdx) { if (classifiedIdx / 3 != prevClassifiedIdx / 3) { classifiedEntityCnt[classifiedIdx / 3]++; if (prevClassifiedIdx != labelNoneIdx) { sb.append('\t'); sb.append(classifiedEntityStr); sb.append(" -> "); sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_'))); sb.append('\n'); } classifiedEntityStr = tokenAnn.getCoveredText(); } else { classifiedEntityStr += " " + tokenAnn.getCoveredText(); } } else if (prevClassifiedIdx != labelNoneIdx) { sb.append('\t'); sb.append(classifiedEntityStr); sb.append(" -> "); sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_'))); sb.append('\n'); classifiedEntityStr = ""; } prevClassifiedIdx = classifiedIdx; if (answerIdx != labelNoneIdx) { if (answerIdx / 3 != prevAnswerIdx / 3) { answerEntityCnt[answerIdx / 3]++; answerEntityStr = tokenAnn.getCoveredText(); } else { answerEntityStr += " " + tokenAnn.getCoveredText(); } } else if (prevAnswerIdx != labelNoneIdx) { answerEntityStr = ""; } prevAnswerIdx = answerIdx; prevClassified = classified; i++; } if (prevClassifiedIdx != labelNoneIdx) { sb.append('\t'); sb.append(classifiedEntityStr); sb.append(" -> "); sb.append(prevClassified.substring(0, prevClassified.lastIndexOf('_'))); sb.append('\n'); } // senti String answerSenti = answerTweet.getPolarityString(); boolean correct = false; String classifiedSenti = tweetAnn.getPolarity(); if (classifiedSenti.equals(senti)) { correct = true; } int classifiedIdx = sentiIdx(classifiedSenti); int answerIdx = sentiIdx(answerSenti); senti[classifiedIdx][0]++; senti[answerIdx][1]++; if (classifiedIdx == answerIdx) { correct = true; } if (correct) { senti[classifiedIdx][2]++; } cnt++; logger.log(Level.INFO, sb.toString()); } catch (CASRuntimeException e) { throw new ResourceProcessException(e); } }