/** Find NGrams that belongs to the question and returns a list of NGram objects. */ public ArrayList<NGram> getQstNGramList(JCas aJCas) throws RuntimeException { FSIndex qstIndex = aJCas.getAnnotationIndex(Question.type); Iterator qstIter = qstIndex.iterator(); Boolean flag = true; ArrayList<NGram> qstNGramList = new ArrayList<NGram>(); // while (qstIter.hasNext()) { if (!flag) { throw new RuntimeException("multiple questions in a file"); } flag = !flag; Question qst = (Question) qstIter.next(); int qstBegin = qst.getBegin(); int qstEnd = qst.getEnd(); FSIndex nGramIndex = aJCas.getAnnotationIndex(NGram.type); Iterator nGramIter = nGramIndex.iterator(); while (nGramIter.hasNext()) { NGram ngram = (NGram) nGramIter.next(); int begin = ngram.getBegin(); int end = ngram.getEnd(); if (begin >= qstBegin && end <= qstEnd) { qstNGramList.add(ngram); } } } return qstNGramList; }
/** Find NGrams that belongs to the answer and returns a list of NGram objects. */ public ArrayList[] getAswNGramList(JCas aJCas) { FSIndex aswIndex = aJCas.getAnnotationIndex(Answer.type); Iterator aswIter = aswIndex.iterator(); ArrayList nGramAndAns[] = new ArrayList[2]; ArrayList<ArrayList<NGram>> aswNGramList = new ArrayList<ArrayList<NGram>>(); // ArrayList<Answer> aswList = new ArrayList<Answer>(); int i = 0; while (aswIter.hasNext()) { Answer asw = (Answer) aswIter.next(); // System.out.println("answer:"+asw.getCoveredText()+" i:"+(i++)); ArrayList<NGram> nGramList = new ArrayList<NGram>(); int aswBegin = asw.getBegin(); int aswEnd = asw.getEnd(); FSIndex nGramIndex = aJCas.getAnnotationIndex(NGram.type); Iterator nGramIter = nGramIndex.iterator(); while (nGramIter.hasNext()) { NGram ngram = (NGram) nGramIter.next(); int begin = ngram.getBegin(); int end = ngram.getEnd(); if (begin >= aswBegin && end <= aswEnd) { nGramList.add(ngram); } } aswList.add(asw); aswNGramList.add(nGramList); } nGramAndAns[0] = aswNGramList; nGramAndAns[1] = aswList; return nGramAndAns; }
/** * Get all answers and their corresponding NGrams. Calculate precision and assign scores with the * precision value */ public void process(JCas aJCas) throws AnalysisEngineProcessException { // TODO Auto-generated method stub ArrayList<NGram> qstNGramList = getQstNGramList(aJCas); ArrayList[] nGramAndAnswer = getAswNGramList(aJCas); ArrayList<ArrayList<NGram>> aswNGramList = nGramAndAnswer[0]; ArrayList<Answer> ansList = nGramAndAnswer[1]; // iterate and find match number for (int i = 0; i < aswNGramList.size(); i++) { // every answer sentence compare with qst ArrayList<NGram> nGramList = aswNGramList.get(i); Answer asw = ansList.get(i); double matchNum = 0; for (NGram aswNGram : nGramList) { for (NGram qstNGram : qstNGramList) { String aswStr = aswNGram.getCoveredText(); String qst = qstNGram.getCoveredText(); if (aswStr.equals(qst)) matchNum++; } } int begin = asw.getBegin(); int end = asw.getEnd(); double gramNum = (double) nGramList.size(); double score = matchNum / gramNum; AnswerScore aswScore = new AnswerScore(aJCas, begin, end); aswScore.setAnswer(asw); aswScore.setScore(score); aswScore.addToIndexes(); } // System.out.println("ok"); // evaluateScore(aJCas); printNamedEntity(aJCas); /** ************************************************ */ /* FSIndex tokenIndex = aJCas.getAnnotationIndex(Token.type); Iterator tokenIter = tokenIndex.iterator(); while(tokenIter.hasNext()){ Token tkn =(Token) tokenIter.next(); System.out.println(tkn.getCoveredText()); } System.out.println("xxxxx"); /***************************************************/ }
/** calculate score using NGram Overlap Method @Override */ public double calculateScore(Question question, Answer answer, JCas aJCas) { // TODO Auto-generated method stub double score = 0; Sentence questionSentence = question.getSentence(); Sentence answerSentence = (Sentence) answer.getSentence(); FSArray questionNgramArray = questionSentence.getNGramArray(); FSArray answerNgramArray = answerSentence.getNGramArray(); int overlapCount = 0; int totalWeight = 0; for (int i = 0; i < questionNgramArray.size(); i++) { boolean isFound = false; totalWeight = 0; for (int j = 0; j < answerNgramArray.size(); j++) { NGram questionToken = (NGram) questionNgramArray.get(i); NGram answerToken = (NGram) answerNgramArray.get(j); if (questionToken.getCoveredText().equals(answerToken.getCoveredText()) && !isFound) { // overlapCount++; overlapCount += answerToken.getElements().size(); isFound = true; // break; } totalWeight += answerToken.getElements().size(); } } // score = (float) overlapCount / (float) answerNgramArray.size(); score = (float) overlapCount / (float) totalWeight; return score; }
/** Outputs Trigrams for an input Annotation. */ public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) { AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type); Iterator<Annotation> tIterator = tokens.subiterator(annotation); Annotation token = null; Annotation prev = null; Annotation prev_prev = null; while (tIterator.hasNext()) { prev_prev = prev; prev = token; token = tIterator.next(); if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) { NGram ngram = new NGram(aJCas); FSArray ngArray = new FSArray(aJCas, 3); ngArray.set(0, prev_prev); ngArray.set(1, prev); ngArray.set(2, token); ngram.setBegin(prev_prev.getBegin()); ngram.setEnd(token.getEnd()); ngram.setElements(ngArray); ngram.setElementType("edu.cmu.deiis.types.Token"); ngram.setConfidence(1D); ngram.setCasProcessorId(PROCESSOR_ID); ngram.addToIndexes(); } } }