public static double countFoundNouns(AnalysisOutput o) { int count = 0; for (int i = 0; i < o.getCNounList().size(); i++) { if (o.getCNounList().get(i).isExist()) count++; } return (count * 100) / o.getCNounList().size(); }
/** * return the start offset of current decompounds entry. * * @param output morphlogical analysis output * @param index the index of current decompounds entry * @return the start offset of current decoumpounds entry */ private int getStartOffset(AnalysisOutput output, int index) { int sOffset = 0; for (int i = 0; i < index; i++) { sOffset += output.getCNounList().get(i).getWord().length(); } return sOffset; }
private void extractKeyword( List<AnalysisOutput> outputs, int startoffset, Map<String, KoreanToken> map, int position) { int maxDecompounds = 0; int maxStem = 0; for (AnalysisOutput output : outputs) { if (queryMode && hasOrigin && output.getScore() == AnalysisOutput.SCORE_ANALYSIS && output.getCNounList().size() < 2) break; if (output.getPos() == PatternConstants.POS_VERB) continue; // extract keywords from only noun if (!originCNoun && output.getCNounList().size() > 0) continue; // except compound nound int inc = map.size() > 0 ? 0 : 1; map.put( position + ":" + output.getStem(), new KoreanToken(output.getStem(), startoffset, inc)); if (output.getStem().length() > maxStem) maxStem = output.getStem().length(); if (output.getCNounList().size() > maxDecompounds) maxDecompounds = output.getCNounList().size(); // extract the first stem as the keyword for the query processing if (queryMode) break; } if (maxDecompounds > 1) { for (int i = 0; i < maxDecompounds; i++) { position += i; int cPosition = position; for (AnalysisOutput output : outputs) { if (output.getPos() == PatternConstants.POS_VERB || output.getCNounList().size() <= i) continue; CompoundEntry cEntry = output.getCNounList().get(i); int cStartoffset = getStartOffset(output, i) + startoffset; int inc = i == 0 ? 0 : 1; map.put( (cPosition) + ":" + cEntry.getWord(), new KoreanToken(cEntry.getWord(), cStartoffset, inc)); if (bigrammable && !cEntry.isExist()) cPosition = addBiagramToMap(cEntry.getWord(), cStartoffset, map, cPosition); // extract the words derived from the first stem as the keyword for the query processing if (queryMode) break; } } } else { for (AnalysisOutput output : outputs) { if (output.getPos() == PatternConstants.POS_VERB) continue; if (bigrammable && output.getScore() < AnalysisOutput.SCORE_COMPOUNDS) addBiagramToMap(output.getStem(), startoffset, map, position); } } }