private String getMaintainers() { final TreeMap<String, Language> list = new TreeMap<>(); for (final Language lang : Language.REAL_LANGUAGES) { if (!lang.isVariant()) { if (lang.getMaintainers() != null) { list.put(messages.getString(lang.getShortName()), lang); } } } final StringBuilder maintainersInfo = new StringBuilder(); maintainersInfo.append("<table border=0 cellspacing=0 cellpadding=0>"); for (String lang : list.keySet()) { maintainersInfo.append("<tr valign=\"top\"><td>"); maintainersInfo.append(lang); maintainersInfo.append(":</td>"); maintainersInfo.append("<td> </td>"); maintainersInfo.append("<td>"); int i = 0; for (Contributor contributor : list.get(lang).getMaintainers()) { if (i > 0) { maintainersInfo.append(", "); if (i % 3 == 0) { maintainersInfo.append("<br>"); } } maintainersInfo.append(contributor.getName()); i++; } maintainersInfo.append("</td></tr>"); } maintainersInfo.append("</table>"); return maintainersInfo.toString(); }
private String dutchSpecialCase( final String firstToken, final String secondToken, final AnalyzedTokenReadings[] tokens) { if (!language.getShortName().equals("nl")) { return null; } if (tokens.length >= 3 && firstToken.equals("'") && secondToken.matches("k|m|n|r|s|t")) { return tokens[3].getToken(); } return null; }
private static Language getLanguageOrExit(final String lang) { Language language = null; boolean foundLanguage = false; final List<String> supportedLanguages = new ArrayList<>(); for (final Language tmpLang : Languages.get()) { supportedLanguages.add(tmpLang.getShortName()); if (lang.equals(tmpLang.getShortName())) { language = tmpLang; foundLanguage = true; break; } } if (!foundLanguage) { System.out.println( "Unknown language '" + lang + "'. Supported languages are: " + supportedLanguages); exitWithUsageMessage(); } return language; }
private String dutchSpecialCase( final String firstToken, final String secondToken, final AnalyzedTokenReadings[] tokens) { if (!language.getShortName().equals("nl")) { return null; } if (tokens.length >= 3 && firstToken.equals("'") && DUTCH_SPECIAL_CASE.matcher(secondToken).matches()) { return tokens[3].getToken(); } return null; }
private Date getLatestDate(String dateField, Language language) { try { String sql = "SELECT " + dateField + " FROM feed_matches WHERE language_code = ? ORDER BY " + dateField + " DESC"; try (PreparedStatement prepSt = conn.prepareStatement(sql)) { prepSt.setString(1, language.getShortName()); ResultSet resultSet = prepSt.executeQuery(); if (resultSet.next() && resultSet.getTimestamp(dateField) != null) { return new Date(resultSet.getTimestamp(dateField).getTime()); } } } catch (Exception e) { throw new RuntimeException("Could not get latest " + dateField + " from database", e); } return new Date(0); }
@Nullable private static MorfologikMultiSpeller getSpeller(Language language) { if (!language.getShortName().equals(Locale.GERMAN.getLanguage())) { throw new RuntimeException("Language is not a variant of German: " + language); } try { String morfoFile = "/de/hunspell/de_" + language.getCountries()[0] + ".dict"; if (JLanguageTool.getDataBroker().resourceExists(morfoFile)) { // spell data will not exist in LibreOffice/OpenOffice context try (InputStream stream = JLanguageTool.getDataBroker() .getFromResourceDirAsStream("/de/hunspell/spelling.txt"); BufferedReader br = new BufferedReader(new InputStreamReader(stream, "utf-8"))) { return new MorfologikMultiSpeller(morfoFile, new ExpandingReader(br), MAX_EDIT_DISTANCE); } } else { return null; } } catch (IOException e) { throw new RuntimeException("Could not set up morfologik spell checker", e); } }
private void run(Language lang) throws IOException { File basePath = new File("/lt/git/languagetool/languagetool-language-modules"); if (!basePath.exists()) { throw new RuntimeException("basePath does not exist: " + basePath); } String langCode = lang.getShortName(); File xml = new File( basePath, "/" + langCode + "/src/main/resources/org/languagetool/rules/" + langCode + "/grammar.xml"); List<String> xmlLines = IOUtils.readLines(new FileReader(xml)); JLanguageTool tool = new JLanguageTool(lang); int totalRules = 0; for (Rule rule : tool.getAllActiveRules()) { if (!(rule instanceof PatternRule)) { continue; } PatternRule patternRule = (PatternRule) rule; String id = patternRule.getFullId(); if (isSimple((PatternRule) rule)) { System.err.println("Simplifying: " + id); simplify(patternRule, xmlLines); } else { System.err.println("Can't simplify: " + id); } totalRules++; } System.err.println("touchedRulesCount: " + touchedRulesCount + " out of " + totalRules); for (String xmlLine : xmlLines) { System.out.println(xmlLine); } }
@Override public final RuleMatch[] match(final AnalyzedSentence text) { final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace(); if (tokens.length < 2) { return toRuleMatchArray(ruleMatches); } int matchTokenPos = 1; // 0 = SENT_START final String firstToken = tokens[matchTokenPos].getToken(); String secondToken = null; String thirdToken = null; // ignore quote characters: if (tokens.length >= 3 && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) { matchTokenPos = 2; secondToken = tokens[matchTokenPos].getToken(); } final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens); if (firstDutchToken != null) { thirdToken = firstDutchToken; matchTokenPos = 3; } String checkToken = firstToken; if (thirdToken != null) { checkToken = thirdToken; } else if (secondToken != null) { checkToken = secondToken; } String lastToken = tokens[tokens.length - 1].getToken(); if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) { // ignore trailing whitespace or quote lastToken = tokens[tokens.length - 2].getToken(); } boolean preventError = false; // TODO: why do only *these* languages have that special case? final String langCode = language.getShortName(); final boolean languageHasSpecialCases = langCode.equals("ru") || langCode.equals("pl") || langCode.equals("uk") || langCode.equals("be") || langCode.equals(Locale.ENGLISH.getLanguage()) || langCode.equals(Locale.ITALIAN.getLanguage()) || langCode.equals(Locale.GERMAN.getLanguage()); if (languageHasSpecialCases) { // fix for lists; note - this will not always work for the last point in OOo, // as OOo might serve paragraphs in any order. if (";".equals(lastParagraphString) || ";".equals(lastToken) || ",".equals(lastParagraphString) || ",".equals(lastToken)) { preventError = true; } // fix for words in table (not sentences); note - this will not always work for the last point // in OOo, // as OOo might serve paragraphs in any order. if (!lastToken.matches("[.?!…]")) { preventError = true; } } lastParagraphString = lastToken; if (checkToken.length() > 0) { final char firstChar = checkToken.charAt(0); if (!preventError && Character.isLowerCase(firstChar)) { final RuleMatch ruleMatch = new RuleMatch( this, tokens[matchTokenPos].getStartPos(), tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(), messages.getString("incorrect_case")); ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken)); ruleMatches.add(ruleMatch); } } return toRuleMatchArray(ruleMatches); }
/** * Get the name of the prohibit file, which lists words not to be accepted, even when the spell * checker would accept them. * * @since 2.8 */ protected String getProhibitFileName() { return language.getShortName() + SPELLING_PROHIBIT_FILE; }
/** * Get the name of the spelling file, which lists words to be accepted and used for suggestions, * even when the spell checker would not accept them. * * @since 2.9 */ protected String getSpellingFileName() { return language.getShortName() + SPELLING_FILE; }
/** * Get the name of the ignore file, which lists words to be accepted, even when the spell checker * would not accept them. Unlike with {@link #getSpellingFileName()} the words in this file will * not be used for creating suggestions for misspelled words. * * @since 2.7 */ protected String getIgnoreFileName() { return language.getShortName() + SPELLING_IGNORE_FILE; }