Esempio n. 1
0
 private String getMaintainers() {
   final TreeMap<String, Language> list = new TreeMap<>();
   for (final Language lang : Language.REAL_LANGUAGES) {
     if (!lang.isVariant()) {
       if (lang.getMaintainers() != null) {
         list.put(messages.getString(lang.getShortName()), lang);
       }
     }
   }
   final StringBuilder maintainersInfo = new StringBuilder();
   maintainersInfo.append("<table border=0 cellspacing=0 cellpadding=0>");
   for (String lang : list.keySet()) {
     maintainersInfo.append("<tr valign=\"top\"><td>");
     maintainersInfo.append(lang);
     maintainersInfo.append(":</td>");
     maintainersInfo.append("<td>&nbsp;</td>");
     maintainersInfo.append("<td>");
     int i = 0;
     for (Contributor contributor : list.get(lang).getMaintainers()) {
       if (i > 0) {
         maintainersInfo.append(", ");
         if (i % 3 == 0) {
           maintainersInfo.append("<br>");
         }
       }
       maintainersInfo.append(contributor.getName());
       i++;
     }
     maintainersInfo.append("</td></tr>");
   }
   maintainersInfo.append("</table>");
   return maintainersInfo.toString();
 }
 private String dutchSpecialCase(
     final String firstToken, final String secondToken, final AnalyzedTokenReadings[] tokens) {
   if (!language.getShortName().equals("nl")) {
     return null;
   }
   if (tokens.length >= 3 && firstToken.equals("'") && secondToken.matches("k|m|n|r|s|t")) {
     return tokens[3].getToken();
   }
   return null;
 }
 private static Language getLanguageOrExit(final String lang) {
   Language language = null;
   boolean foundLanguage = false;
   final List<String> supportedLanguages = new ArrayList<>();
   for (final Language tmpLang : Languages.get()) {
     supportedLanguages.add(tmpLang.getShortName());
     if (lang.equals(tmpLang.getShortName())) {
       language = tmpLang;
       foundLanguage = true;
       break;
     }
   }
   if (!foundLanguage) {
     System.out.println(
         "Unknown language '" + lang + "'. Supported languages are: " + supportedLanguages);
     exitWithUsageMessage();
   }
   return language;
 }
 private String dutchSpecialCase(
     final String firstToken, final String secondToken, final AnalyzedTokenReadings[] tokens) {
   if (!language.getShortName().equals("nl")) {
     return null;
   }
   if (tokens.length >= 3
       && firstToken.equals("'")
       && DUTCH_SPECIAL_CASE.matcher(secondToken).matches()) {
     return tokens[3].getToken();
   }
   return null;
 }
Esempio n. 5
0
 private Date getLatestDate(String dateField, Language language) {
   try {
     String sql =
         "SELECT "
             + dateField
             + " FROM feed_matches WHERE language_code = ? ORDER BY "
             + dateField
             + " DESC";
     try (PreparedStatement prepSt = conn.prepareStatement(sql)) {
       prepSt.setString(1, language.getShortName());
       ResultSet resultSet = prepSt.executeQuery();
       if (resultSet.next() && resultSet.getTimestamp(dateField) != null) {
         return new Date(resultSet.getTimestamp(dateField).getTime());
       }
     }
   } catch (Exception e) {
     throw new RuntimeException("Could not get latest " + dateField + " from database", e);
   }
   return new Date(0);
 }
 @Nullable
 private static MorfologikMultiSpeller getSpeller(Language language) {
   if (!language.getShortName().equals(Locale.GERMAN.getLanguage())) {
     throw new RuntimeException("Language is not a variant of German: " + language);
   }
   try {
     String morfoFile = "/de/hunspell/de_" + language.getCountries()[0] + ".dict";
     if (JLanguageTool.getDataBroker().resourceExists(morfoFile)) {
       // spell data will not exist in LibreOffice/OpenOffice context
       try (InputStream stream =
               JLanguageTool.getDataBroker()
                   .getFromResourceDirAsStream("/de/hunspell/spelling.txt");
           BufferedReader br = new BufferedReader(new InputStreamReader(stream, "utf-8"))) {
         return new MorfologikMultiSpeller(morfoFile, new ExpandingReader(br), MAX_EDIT_DISTANCE);
       }
     } else {
       return null;
     }
   } catch (IOException e) {
     throw new RuntimeException("Could not set up morfologik spell checker", e);
   }
 }
 private void run(Language lang) throws IOException {
   File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
   if (!basePath.exists()) {
     throw new RuntimeException("basePath does not exist: " + basePath);
   }
   String langCode = lang.getShortName();
   File xml =
       new File(
           basePath,
           "/"
               + langCode
               + "/src/main/resources/org/languagetool/rules/"
               + langCode
               + "/grammar.xml");
   List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
   JLanguageTool tool = new JLanguageTool(lang);
   int totalRules = 0;
   for (Rule rule : tool.getAllActiveRules()) {
     if (!(rule instanceof PatternRule)) {
       continue;
     }
     PatternRule patternRule = (PatternRule) rule;
     String id = patternRule.getFullId();
     if (isSimple((PatternRule) rule)) {
       System.err.println("Simplifying: " + id);
       simplify(patternRule, xmlLines);
     } else {
       System.err.println("Can't simplify: " + id);
     }
     totalRules++;
   }
   System.err.println("touchedRulesCount: " + touchedRulesCount + " out of " + totalRules);
   for (String xmlLine : xmlLines) {
     System.out.println(xmlLine);
   }
 }
  @Override
  public final RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
    if (tokens.length < 2) {
      return toRuleMatchArray(ruleMatches);
    }
    int matchTokenPos = 1; // 0 = SENT_START
    final String firstToken = tokens[matchTokenPos].getToken();
    String secondToken = null;
    String thirdToken = null;
    // ignore quote characters:
    if (tokens.length >= 3
        && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) {
      matchTokenPos = 2;
      secondToken = tokens[matchTokenPos].getToken();
    }
    final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
    if (firstDutchToken != null) {
      thirdToken = firstDutchToken;
      matchTokenPos = 3;
    }

    String checkToken = firstToken;
    if (thirdToken != null) {
      checkToken = thirdToken;
    } else if (secondToken != null) {
      checkToken = secondToken;
    }

    String lastToken = tokens[tokens.length - 1].getToken();
    if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) {
      // ignore trailing whitespace or quote
      lastToken = tokens[tokens.length - 2].getToken();
    }

    boolean preventError = false;
    // TODO: why do only *these* languages have that special case?
    final String langCode = language.getShortName();
    final boolean languageHasSpecialCases =
        langCode.equals("ru")
            || langCode.equals("pl")
            || langCode.equals("uk")
            || langCode.equals("be")
            || langCode.equals(Locale.ENGLISH.getLanguage())
            || langCode.equals(Locale.ITALIAN.getLanguage())
            || langCode.equals(Locale.GERMAN.getLanguage());
    if (languageHasSpecialCases) {
      // fix for lists; note - this will not always work for the last point in OOo,
      // as OOo might serve paragraphs in any order.
      if (";".equals(lastParagraphString)
          || ";".equals(lastToken)
          || ",".equals(lastParagraphString)
          || ",".equals(lastToken)) {
        preventError = true;
      }
      // fix for words in table (not sentences); note - this will not always work for the last point
      // in OOo,
      // as OOo might serve paragraphs in any order.
      if (!lastToken.matches("[.?!…]")) {
        preventError = true;
      }
    }

    lastParagraphString = lastToken;

    if (checkToken.length() > 0) {
      final char firstChar = checkToken.charAt(0);
      if (!preventError && Character.isLowerCase(firstChar)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                this,
                tokens[matchTokenPos].getStartPos(),
                tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(),
                messages.getString("incorrect_case"));
        ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
        ruleMatches.add(ruleMatch);
      }
    }
    return toRuleMatchArray(ruleMatches);
  }
 /**
  * Get the name of the prohibit file, which lists words not to be accepted, even when the spell
  * checker would accept them.
  *
  * @since 2.8
  */
 protected String getProhibitFileName() {
   return language.getShortName() + SPELLING_PROHIBIT_FILE;
 }
 /**
  * Get the name of the spelling file, which lists words to be accepted and used for suggestions,
  * even when the spell checker would not accept them.
  *
  * @since 2.9
  */
 protected String getSpellingFileName() {
   return language.getShortName() + SPELLING_FILE;
 }
 /**
  * Get the name of the ignore file, which lists words to be accepted, even when the spell checker
  * would not accept them. Unlike with {@link #getSpellingFileName()} the words in this file will
  * not be used for creating suggestions for misspelled words.
  *
  * @since 2.7
  */
 protected String getIgnoreFileName() {
   return language.getShortName() + SPELLING_IGNORE_FILE;
 }