コード例 #1
0
 public static double countFoundNouns(AnalysisOutput o) {
   int count = 0;
   for (int i = 0; i < o.getCNounList().size(); i++) {
     if (o.getCNounList().get(i).isExist()) count++;
   }
   return (count * 100) / o.getCNounList().size();
 }
コード例 #2
0
 /**
  * return the start offset of current decompounds entry.
  *
  * @param output morphlogical analysis output
  * @param index the index of current decompounds entry
  * @return the start offset of current decoumpounds entry
  */
 private int getStartOffset(AnalysisOutput output, int index) {
   int sOffset = 0;
   for (int i = 0; i < index; i++) {
     sOffset += output.getCNounList().get(i).getWord().length();
   }
   return sOffset;
 }
コード例 #3
0
  private void extractKeyword(
      List<AnalysisOutput> outputs, int startoffset, Map<String, KoreanToken> map, int position) {

    int maxDecompounds = 0;
    int maxStem = 0;

    for (AnalysisOutput output : outputs) {
      if (queryMode
          && hasOrigin
          && output.getScore() == AnalysisOutput.SCORE_ANALYSIS
          && output.getCNounList().size() < 2) break;
      if (output.getPos() == PatternConstants.POS_VERB) continue; // extract keywords from only noun
      if (!originCNoun && output.getCNounList().size() > 0) continue; // except compound nound
      int inc = map.size() > 0 ? 0 : 1;
      map.put(
          position + ":" + output.getStem(), new KoreanToken(output.getStem(), startoffset, inc));

      if (output.getStem().length() > maxStem) maxStem = output.getStem().length();
      if (output.getCNounList().size() > maxDecompounds)
        maxDecompounds = output.getCNounList().size();

      // extract the first stem as the keyword for the query processing
      if (queryMode) break;
    }

    if (maxDecompounds > 1) {
      for (int i = 0; i < maxDecompounds; i++) {
        position += i;

        int cPosition = position;
        for (AnalysisOutput output : outputs) {
          if (output.getPos() == PatternConstants.POS_VERB || output.getCNounList().size() <= i)
            continue;

          CompoundEntry cEntry = output.getCNounList().get(i);
          int cStartoffset = getStartOffset(output, i) + startoffset;
          int inc = i == 0 ? 0 : 1;
          map.put(
              (cPosition) + ":" + cEntry.getWord(),
              new KoreanToken(cEntry.getWord(), cStartoffset, inc));

          if (bigrammable && !cEntry.isExist())
            cPosition = addBiagramToMap(cEntry.getWord(), cStartoffset, map, cPosition);

          // extract	the words derived from the first stem as the keyword for the query processing
          if (queryMode) break;
        }
      }
    } else {
      for (AnalysisOutput output : outputs) {
        if (output.getPos() == PatternConstants.POS_VERB) continue;

        if (bigrammable && output.getScore() < AnalysisOutput.SCORE_COMPOUNDS)
          addBiagramToMap(output.getStem(), startoffset, map, position);
      }
    }
  }