public void testChunker() throws Exception { JLanguageTool lt = new JLanguageTool(new Ukrainian()); AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для годиться."); AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence); AnalyzedTokenReadings[] tokens = disambiguated.getTokens(); assertTrue(tokens[1].getReadings().toString().contains("<adv>")); assertTrue(tokens[4].getReadings().toString().contains("</adv>")); }
@Override public RuleMatch[] match(AnalyzedSentence sentence) { final List<RuleMatch> ruleMatches = new ArrayList<>(); final AnalyzedTokenReadings[] tokens = sentence.getTokens(); boolean foundSpiegelt = false; boolean foundWieder = false; boolean foundWider = false; for (int i = 0; i < tokens.length; i++) { final String token = tokens[i].getToken(); if (!token.trim().equals("")) { if (token.equalsIgnoreCase("spiegelt") || token.equalsIgnoreCase("spiegeln") || token.equalsIgnoreCase("spiegelte") || token.equalsIgnoreCase("spiegelten") || token.equalsIgnoreCase("spiegelst")) { foundSpiegelt = true; } else if (token.equalsIgnoreCase("wieder") && foundSpiegelt) { foundWieder = true; } else if (token.equalsIgnoreCase("wider") && foundSpiegelt) { foundWider = true; } if (foundSpiegelt && foundWieder && !foundWider && !(tokens.length > i + 2 && (tokens[i + 1].getToken().equals("wider") || tokens[i + 2].getToken().equals("wider")))) { final String shortMsg = "'wider' in 'widerspiegeln' wird mit 'i' geschrieben"; final String msg = "'wider' in 'widerspiegeln' wird mit 'i' statt mit 'ie' " + "geschrieben, z.B. 'Das spiegelt die Situation gut wider.'"; final int pos = tokens[i].getStartPos(); final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + token.length(), msg, shortMsg); ruleMatch.setSuggestedReplacement("wider"); ruleMatches.add(ruleMatch); foundSpiegelt = false; foundWieder = false; foundWider = false; } } } return toRuleMatchArray(ruleMatches); }
@Override public RuleMatch[] match(AnalyzedSentence sentence) throws IOException { final List<RuleMatch> ruleMatches = new ArrayList<>(); if (needsInit) { init(); } if (hunspellDict == null) { // some languages might not have a dictionary, be silent about it return toRuleMatchArray(ruleMatches); } final String[] tokens = tokenizeText(getSentenceTextWithoutUrlsAndImmunizedTokens(sentence)); // starting with the first token to skip the zero-length START_SENT int len = sentence.getTokens()[1].getStartPos(); for (int i = 0; i < tokens.length; i++) { String word = tokens[i]; if (ignoreWord(Arrays.asList(tokens), i) || ignoreWord(word)) { len += word.length() + 1; continue; } if (isMisspelled(word)) { final RuleMatch ruleMatch = new RuleMatch( this, len, len + word.length(), messages.getString("spelling"), messages.getString("desc_spelling_short")); final List<String> suggestions = getSuggestions(word); suggestions.addAll(0, getAdditionalTopSuggestions(suggestions, word)); suggestions.addAll(getAdditionalSuggestions(suggestions, word)); if (!suggestions.isEmpty()) { filterSuggestions(suggestions); ruleMatch.setSuggestedReplacements(suggestions); } ruleMatches.add(ruleMatch); } len += word.length() + 1; } return toRuleMatchArray(ruleMatches); }