/** Find NGrams that belongs to the question and returns a list of NGram objects. */
 public ArrayList<NGram> getQstNGramList(JCas aJCas) throws RuntimeException {
   FSIndex qstIndex = aJCas.getAnnotationIndex(Question.type);
   Iterator qstIter = qstIndex.iterator();
   Boolean flag = true;
   ArrayList<NGram> qstNGramList = new ArrayList<NGram>(); //
   while (qstIter.hasNext()) {
     if (!flag) {
       throw new RuntimeException("multiple questions in a file");
     }
     flag = !flag;
     Question qst = (Question) qstIter.next();
     int qstBegin = qst.getBegin();
     int qstEnd = qst.getEnd();
     FSIndex nGramIndex = aJCas.getAnnotationIndex(NGram.type);
     Iterator nGramIter = nGramIndex.iterator();
     while (nGramIter.hasNext()) {
       NGram ngram = (NGram) nGramIter.next();
       int begin = ngram.getBegin();
       int end = ngram.getEnd();
       if (begin >= qstBegin && end <= qstEnd) {
         qstNGramList.add(ngram);
       }
     }
   }
   return qstNGramList;
 }
  /** Find NGrams that belongs to the answer and returns a list of NGram objects. */
  public ArrayList[] getAswNGramList(JCas aJCas) {
    FSIndex aswIndex = aJCas.getAnnotationIndex(Answer.type);
    Iterator aswIter = aswIndex.iterator();
    ArrayList nGramAndAns[] = new ArrayList[2];
    ArrayList<ArrayList<NGram>> aswNGramList = new ArrayList<ArrayList<NGram>>(); //
    ArrayList<Answer> aswList = new ArrayList<Answer>();
    int i = 0;
    while (aswIter.hasNext()) {
      Answer asw = (Answer) aswIter.next();
      // System.out.println("answer:"+asw.getCoveredText()+"  i:"+(i++));
      ArrayList<NGram> nGramList = new ArrayList<NGram>();
      int aswBegin = asw.getBegin();
      int aswEnd = asw.getEnd();
      FSIndex nGramIndex = aJCas.getAnnotationIndex(NGram.type);
      Iterator nGramIter = nGramIndex.iterator();

      while (nGramIter.hasNext()) {
        NGram ngram = (NGram) nGramIter.next();
        int begin = ngram.getBegin();
        int end = ngram.getEnd();
        if (begin >= aswBegin && end <= aswEnd) {
          nGramList.add(ngram);
        }
      }
      aswList.add(asw);
      aswNGramList.add(nGramList);
    }
    nGramAndAns[0] = aswNGramList;
    nGramAndAns[1] = aswList;

    return nGramAndAns;
  }
  /**
   * Get all answers and their corresponding NGrams. Calculate precision and assign scores with the
   * precision value
   */
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    // TODO Auto-generated method stub

    ArrayList<NGram> qstNGramList = getQstNGramList(aJCas);
    ArrayList[] nGramAndAnswer = getAswNGramList(aJCas);
    ArrayList<ArrayList<NGram>> aswNGramList = nGramAndAnswer[0];
    ArrayList<Answer> ansList = nGramAndAnswer[1];
    // iterate and find match number
    for (int i = 0; i < aswNGramList.size(); i++) { // every answer sentence compare with qst
      ArrayList<NGram> nGramList = aswNGramList.get(i);
      Answer asw = ansList.get(i);
      double matchNum = 0;
      for (NGram aswNGram : nGramList) {
        for (NGram qstNGram : qstNGramList) {
          String aswStr = aswNGram.getCoveredText();
          String qst = qstNGram.getCoveredText();
          if (aswStr.equals(qst)) matchNum++;
        }
      }
      int begin = asw.getBegin();
      int end = asw.getEnd();
      double gramNum = (double) nGramList.size();
      double score = matchNum / gramNum;
      AnswerScore aswScore = new AnswerScore(aJCas, begin, end);
      aswScore.setAnswer(asw);
      aswScore.setScore(score);
      aswScore.addToIndexes();
    }
    // System.out.println("ok");
    // evaluateScore(aJCas);
    printNamedEntity(aJCas);

    /** ************************************************ */
    /*    FSIndex tokenIndex = aJCas.getAnnotationIndex(Token.type);
    Iterator tokenIter = tokenIndex.iterator();
    while(tokenIter.hasNext()){
      Token tkn =(Token) tokenIter.next();
      System.out.println(tkn.getCoveredText());
    }
    System.out.println("xxxxx");
    /***************************************************/
  }
  /** calculate score using NGram Overlap Method @Override */
  public double calculateScore(Question question, Answer answer, JCas aJCas) {
    // TODO Auto-generated method stub
    double score = 0;
    Sentence questionSentence = question.getSentence();
    Sentence answerSentence = (Sentence) answer.getSentence();
    FSArray questionNgramArray = questionSentence.getNGramArray();
    FSArray answerNgramArray = answerSentence.getNGramArray();

    int overlapCount = 0;
    int totalWeight = 0;
    for (int i = 0; i < questionNgramArray.size(); i++) {
      boolean isFound = false;
      totalWeight = 0;
      for (int j = 0; j < answerNgramArray.size(); j++) {

        NGram questionToken = (NGram) questionNgramArray.get(i);
        NGram answerToken = (NGram) answerNgramArray.get(j);

        if (questionToken.getCoveredText().equals(answerToken.getCoveredText()) && !isFound) {

          // overlapCount++;
          overlapCount += answerToken.getElements().size();
          isFound = true;
          // break;
        }
        totalWeight += answerToken.getElements().size();
      }
    }
    // score = (float) overlapCount / (float) answerNgramArray.size();
    score = (float) overlapCount / (float) totalWeight;
    return score;
  }
示例#5
0
  /** Outputs Trigrams for an input Annotation. */
  public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) {

    AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type);
    Iterator<Annotation> tIterator = tokens.subiterator(annotation);

    Annotation token = null;
    Annotation prev = null;
    Annotation prev_prev = null;

    while (tIterator.hasNext()) {

      prev_prev = prev;
      prev = token;
      token = tIterator.next();

      if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) {

        NGram ngram = new NGram(aJCas);
        FSArray ngArray = new FSArray(aJCas, 3);

        ngArray.set(0, prev_prev);
        ngArray.set(1, prev);
        ngArray.set(2, token);

        ngram.setBegin(prev_prev.getBegin());
        ngram.setEnd(token.getEnd());

        ngram.setElements(ngArray);
        ngram.setElementType("edu.cmu.deiis.types.Token");

        ngram.setConfidence(1D);
        ngram.setCasProcessorId(PROCESSOR_ID);
        ngram.addToIndexes();
      }
    }
  }