public static boolean analysisNSMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) throws MorphException { int idxVbSfix = VerbUtil.endsWithVerbSuffix(o.getStem()); if (idxVbSfix == -1) return false; o.setVsfx(o.getStem().substring(idxVbSfix)); o.setStem(o.getStem().substring(0, idxVbSfix)); o.setPatn(PatternConstants.PTN_NSMXMJ); o.setPos(PatternConstants.POS_NOUN); WordEntry entry = DictionaryUtil.getWordExceptVerb(o.getStem()); if (entry != null) { if (entry.getFeature(WordEntry.IDX_NOUN) == '0') return false; else if (o.getVsfx().equals("하") && entry.getFeature(WordEntry.IDX_DOV) != '1') return false; else if (o.getVsfx().equals("되") && entry.getFeature(WordEntry.IDX_BEV) != '1') return false; else if (o.getVsfx().equals("내") && entry.getFeature(WordEntry.IDX_NE) != '1') return false; o.setScore(AnalysisOutput.SCORE_CORRECT); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다. } else { o.setScore(AnalysisOutput.SCORE_ANALYSIS); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다. } candidates.add(o); return true; }
/** * 용언 + '아/어' + 보조용언 + '음/기' + 조사(PTN_VMXMJ) * * @param o the analyzed output * @param candidates candidates * @throws MorphException throw exception */ public static boolean analysisVMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) throws MorphException { int idxXVerb = VerbUtil.endsWithXVerb(o.getStem()); if (idxXVerb != -1) { // 2. 사랑받아보다 String eogan = o.getStem().substring(0, idxXVerb); o.setXverb(o.getStem().substring(idxXVerb)); String[] stomis = null; if (eogan.endsWith("아") || eogan.endsWith("어")) stomis = EomiUtil.splitEomi( eogan.substring(0, eogan.length() - 1), eogan.substring(eogan.length() - 1)); else stomis = EomiUtil.splitEomi(eogan, ""); if (stomis[0] == null) return false; String[] irrs = IrregularUtil.restoreIrregularVerb(stomis[0], stomis[1]); if (irrs != null) { o.setStem(irrs[0]); o.addElist(irrs[1]); } else { o.setStem(stomis[0]); o.addElist(stomis[1]); } if (DictionaryUtil.getVerb(o.getStem()) != null) { o.setPatn(PatternConstants.PTN_VMXMJ); o.setPos(PatternConstants.POS_VERB); o.setScore(AnalysisOutput.SCORE_CORRECT); candidates.add(o); return true; } else if (analysisNSMXMJ(o, candidates)) { return true; } } return false; }
/** * 어간부가 음/기 로 끝나는 경우 * * @param o the analyzed output * @param candidates candidates * @throws MorphException throw exception */ public static boolean analysisMJ(AnalysisOutput o, List<AnalysisOutput> candidates) throws MorphException { int strlen = o.getStem().length(); if (strlen < 2) return false; char[] chrs = MorphUtil.decompose(o.getStem().charAt(strlen - 1)); boolean success = false; if (o.getStem().charAt(strlen - 1) != '기' && !(chrs.length == 3 && chrs[2] == 'ㅁ')) return false; String start = o.getStem(); String end = ""; if (o.getStem().charAt(strlen - 1) == '기') { start = o.getStem().substring(0, strlen - 1); end = "기"; } else if (o.getStem().charAt(strlen - 1) == '음') { start = o.getStem().substring(0, strlen - 1); end = "음"; } String[] eomis = EomiUtil.splitEomi(start, end); if (eomis[0] == null) return false; String[] pomis = EomiUtil.splitPomi(eomis[0]); o.setStem(pomis[0]); o.addElist(eomis[1]); o.setPomi(pomis[1]); try { if (analysisVMJ(o.clone(), candidates)) return true; if (analysisNSMJ(o.clone(), candidates)) return true; if (analysisVMXMJ(o.clone(), candidates)) return true; } catch (CloneNotSupportedException e) { throw new MorphException(e.getMessage(), e); } if (DictionaryUtil.getVerb(o.getStem()) != null) { o.setPos(PatternConstants.POS_VERB); o.setPatn(PatternConstants.PTN_VMJ); o.setScore(AnalysisOutput.SCORE_CORRECT); candidates.add(o); return true; } return false; }
/* * 마지막 음절이 명사형 접미사(등,상..)인지 조사한다. */ public static boolean confirmDNoun(AnalysisOutput output) throws MorphException { int strlen = output.getStem().length(); String d = output.getStem().substring(strlen - 1); if (!DNouns.contains(d)) return false; String s = output.getStem().substring(0, strlen - 1); output.setNsfx(d); output.setStem(s); WordEntry cnoun = DictionaryUtil.getAllNoun(s); if (cnoun != null) { if (cnoun.getFeature(WordEntry.IDX_NOUN) == '2') output.setCNoun(cnoun.getCompounds()); else output.setCNoun(Collections.EMPTY_LIST); output.setScore(AnalysisOutput.SCORE_CORRECT); } return true; }
/** * 용언 + '음/기' + 조사(PTN_VMXMJ) * * @param o the analyzed output * @param candidates candidates * @throws MorphException throw exception */ public static boolean analysisVMJ(AnalysisOutput o, List<AnalysisOutput> candidates) throws MorphException { String[] irrs = IrregularUtil.restoreIrregularVerb(o.getStem(), o.getElist().get(0)); if (irrs != null) { o.setStem(irrs[0]); o.setElist(irrs[1], 0); } if (DictionaryUtil.getVerb(o.getStem()) != null) { o.setPatn(PatternConstants.PTN_VMJ); o.setPos(PatternConstants.POS_VERB); o.setScore(AnalysisOutput.SCORE_CORRECT); candidates.add(o); return true; } return false; }
/** * 복합명사에서 단위명사를 분리해낸다. 리스트의 가장 마지막에 위치한 단어가 최장단어이다. * * @param str 복합명사 * @param pos the analysing start point * @param o 분석결과 return 단위명사 리스트 * @throws MorphException throw exception */ private static List<WordEntry> findNouns(String str, int pos, AnalysisOutput o) throws MorphException { List<WordEntry> nList = new ArrayList<WordEntry>(); if (str.length() == 2 && DictionaryUtil.existSuffix(str.substring(0, 1)) && DNouns.contains(str.substring(1))) { o.setStem(o.getStem().substring(0, o.getStem().length() - 1)); o.setNsfx(str.substring(1)); nList.add(new WordEntry(str.substring(0, 1))); return nList; } else if (str.length() == 2 && DictionaryUtil.existSuffix(str.substring(0, 1)) && DictionaryUtil.existJosa(str.substring(1))) { return null; } if (pos >= 2 && DictionaryUtil.existJosa(str)) return null; if (str.length() == 1 && (DictionaryUtil.existSuffix(str) || DNouns.contains(str))) { nList.add(new WordEntry(str)); return nList; } for (int i = 1; i < str.length(); i++) { String sub = str.substring(0, i + 1); if (!DictionaryUtil.findWithPrefix(sub).hasNext()) break; WordEntry entry = DictionaryUtil.getAllNoun(sub); if (entry != null) { nList.add(entry); } } return nList; }