예제 #1
0
 private String getMaintainers() {
   final TreeMap<String, Language> list = new TreeMap<>();
   for (final Language lang : Language.REAL_LANGUAGES) {
     if (!lang.isVariant()) {
       if (lang.getMaintainers() != null) {
         list.put(messages.getString(lang.getShortName()), lang);
       }
     }
   }
   final StringBuilder maintainersInfo = new StringBuilder();
   maintainersInfo.append("<table border=0 cellspacing=0 cellpadding=0>");
   for (String lang : list.keySet()) {
     maintainersInfo.append("<tr valign=\"top\"><td>");
     maintainersInfo.append(lang);
     maintainersInfo.append(":</td>");
     maintainersInfo.append("<td>&nbsp;</td>");
     maintainersInfo.append("<td>");
     int i = 0;
     for (Contributor contributor : list.get(lang).getMaintainers()) {
       if (i > 0) {
         maintainersInfo.append(", ");
         if (i % 3 == 0) {
           maintainersInfo.append("<br>");
         }
       }
       maintainersInfo.append(contributor.getName());
       i++;
     }
     maintainersInfo.append("</td></tr>");
   }
   maintainersInfo.append("</table>");
   return maintainersInfo.toString();
 }
 public void testStartSymbolCountEqualsEndSymbolCount() throws IOException {
   for (Language language : Language.LANGUAGES) {
     final int startSymbols = language.getUnpairedRuleStartSymbols().length;
     final int endSymbols = language.getUnpairedRuleEndSymbols().length;
     assertEquals(
         "Different number of start and end symbols for " + language, startSymbols, endSymbols);
   }
 }
예제 #3
0
 private Object[] getPossibleMotherTongues() {
   final List<Object> motherTongues = new ArrayList<>();
   motherTongues.add(NO_MOTHER_TONGUE);
   for (final Language lang : Language.REAL_LANGUAGES) {
     motherTongues.add(lang.getTranslatedName(messages));
   }
   return motherTongues.toArray();
 }
예제 #4
0
 /**
  * Get the Language object for the given localized language name.
  *
  * @param languageName e.g. <code>English</code> or <code>German</code> (case is significant)
  * @return a Language object or <code>null</code>
  */
 private Language getLanguageForLocalizedName(final String languageName) {
   for (final Language element : Language.REAL_LANGUAGES) {
     if (languageName.equals(element.getTranslatedName(messages))) {
       return element;
     }
   }
   return null;
 }
예제 #5
0
 public static void main(String[] args) throws Exception {
   ensureCorrectUsageOrExit(args);
   final long startTime = System.currentTimeMillis();
   final String[] ruleIds = args[0].split(",");
   final File ruleFile = new File(args[1]);
   final String languageCode = args[2];
   final Language language = Language.getLanguageForShortName(languageCode);
   final File indexDir = new File(args[3]);
   if (args.length > 4 && "--no_limit".equals(args[4])) {
     limitSearch = false;
   }
   final Searcher searcher = new Searcher(new SimpleFSDirectory(indexDir));
   if (!limitSearch) {
     searcher.setMaxHits(100000);
   }
   for (String ruleId : ruleIds) {
     final long ruleStartTime = System.currentTimeMillis();
     for (PatternRule rule : searcher.getRuleById(ruleId, ruleFile)) {
       final SearcherResult searcherResult = searcher.findRuleMatchesOnIndex(rule, language);
       int i = 1;
       if (searcherResult.getMatchingSentences().size() == 0) {
         System.out.println("[no matches]");
       }
       for (MatchingSentence ruleMatch : searcherResult.getMatchingSentences()) {
         System.out.println(
             i + ": " + ruleMatch.getSentence() + " (Source: " + ruleMatch.getSource() + ")");
         i++;
       }
       System.out.println("Time: " + (System.currentTimeMillis() - ruleStartTime) + "ms");
       System.out.println("==============================================================");
     }
   }
   System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms");
 }
 private static Language getLanguageOrExit(final String lang) {
   Language language = null;
   boolean foundLanguage = false;
   final List<String> supportedLanguages = new ArrayList<>();
   for (final Language tmpLang : Languages.get()) {
     supportedLanguages.add(tmpLang.getShortName());
     if (lang.equals(tmpLang.getShortName())) {
       language = tmpLang;
       foundLanguage = true;
       break;
     }
   }
   if (!foundLanguage) {
     System.out.println(
         "Unknown language '" + lang + "'. Supported languages are: " + supportedLanguages);
     exitWithUsageMessage();
   }
   return language;
 }
 private String dutchSpecialCase(
     final String firstToken, final String secondToken, final AnalyzedTokenReadings[] tokens) {
   if (!language.getShortName().equals("nl")) {
     return null;
   }
   if (tokens.length >= 3 && firstToken.equals("'") && secondToken.matches("k|m|n|r|s|t")) {
     return tokens[3].getToken();
   }
   return null;
 }
 private String dutchSpecialCase(
     final String firstToken, final String secondToken, final AnalyzedTokenReadings[] tokens) {
   if (!language.getShortName().equals("nl")) {
     return null;
   }
   if (tokens.length >= 3
       && firstToken.equals("'")
       && DUTCH_SPECIAL_CASE.matcher(secondToken).matches()) {
     return tokens[3].getToken();
   }
   return null;
 }
예제 #9
0
 @Nullable
 private static MorfologikMultiSpeller getSpeller(Language language) {
   if (!language.getShortName().equals(Locale.GERMAN.getLanguage())) {
     throw new RuntimeException("Language is not a variant of German: " + language);
   }
   try {
     String morfoFile = "/de/hunspell/de_" + language.getCountries()[0] + ".dict";
     if (JLanguageTool.getDataBroker().resourceExists(morfoFile)) {
       // spell data will not exist in LibreOffice/OpenOffice context
       try (InputStream stream =
               JLanguageTool.getDataBroker()
                   .getFromResourceDirAsStream("/de/hunspell/spelling.txt");
           BufferedReader br = new BufferedReader(new InputStreamReader(stream, "utf-8"))) {
         return new MorfologikMultiSpeller(morfoFile, new ExpandingReader(br), MAX_EDIT_DISTANCE);
       }
     } else {
       return null;
     }
   } catch (IOException e) {
     throw new RuntimeException("Could not set up morfologik spell checker", e);
   }
 }
예제 #10
0
 private Date getLatestDate(String dateField, Language language) {
   try {
     String sql =
         "SELECT "
             + dateField
             + " FROM feed_matches WHERE language_code = ? ORDER BY "
             + dateField
             + " DESC";
     try (PreparedStatement prepSt = conn.prepareStatement(sql)) {
       prepSt.setString(1, language.getShortName());
       ResultSet resultSet = prepSt.executeQuery();
       if (resultSet.next() && resultSet.getTimestamp(dateField) != null) {
         return new Date(resultSet.getTimestamp(dateField).getTime());
       }
     }
   } catch (Exception e) {
     throw new RuntimeException("Could not get latest " + dateField + " from database", e);
   }
   return new Date(0);
 }
예제 #11
0
 private void run(Language lang) throws IOException {
   File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
   if (!basePath.exists()) {
     throw new RuntimeException("basePath does not exist: " + basePath);
   }
   String langCode = lang.getShortName();
   File xml =
       new File(
           basePath,
           "/"
               + langCode
               + "/src/main/resources/org/languagetool/rules/"
               + langCode
               + "/grammar.xml");
   List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
   JLanguageTool tool = new JLanguageTool(lang);
   int totalRules = 0;
   for (Rule rule : tool.getAllActiveRules()) {
     if (!(rule instanceof PatternRule)) {
       continue;
     }
     PatternRule patternRule = (PatternRule) rule;
     String id = patternRule.getFullId();
     if (isSimple((PatternRule) rule)) {
       System.err.println("Simplifying: " + id);
       simplify(patternRule, xmlLines);
     } else {
       System.err.println("Can't simplify: " + id);
     }
     totalRules++;
   }
   System.err.println("touchedRulesCount: " + touchedRulesCount + " out of " + totalRules);
   for (String xmlLine : xmlLines) {
     System.out.println(xmlLine);
   }
 }
 /**
  * Get the name of the spelling file, which lists words to be accepted and used for suggestions,
  * even when the spell checker would not accept them.
  *
  * @since 2.9
  */
 protected String getSpellingFileName() {
   return language.getShortName() + SPELLING_FILE;
 }
예제 #13
0
  /**
   * Takes an XML file named <tt>rules-xx-language.xml</tt>, e.g. <tt>rules-de-German.xml</tt> and
   * builds a Language object for that language.
   */
  private static Language makeLanguage(final File file, final boolean isAdditional)
      throws IllegalAccessException, InstantiationException {
    Objects.requireNonNull(file, "file cannot be null");
    if (!file.getName().endsWith(".xml")) {
      throw new RuleFilenameException(file);
    }
    final String[] parts = file.getName().split("-");
    final boolean startsWithRules = parts[0].equals("rules");
    final boolean secondPartHasCorrectLength =
        parts.length == 3
            && (parts[1].length() == "en".length()
                || parts[1].length() == "ast".length()
                || parts[1].length() == "en_US".length());
    if (!startsWithRules || !secondPartHasCorrectLength) {
      throw new RuleFilenameException(file);
    }
    // TODO: when the XML file is mergeable with
    // other rules (check this in the XML Rule Loader by using rules[@integrate='add']?),
    // subclass the existing language,
    // and adjust the settings if any are set in the rule file default configuration set

    Language newLanguage;
    if (Language.isLanguageSupported(parts[1])) {
      newLanguage = Language.getLanguageForShortName(parts[1]).getClass().newInstance();
      newLanguage.addExternalRuleFile(file.getAbsolutePath());
      newLanguage.setName(parts[2].replace(".xml", ""));
      newLanguage.makeExternal();
    } else {
      newLanguage =
          new Language() {
            @Override
            public Locale getLocale() {
              return new Locale(getShortName());
            }

            @Override
            public Contributor[] getMaintainers() {
              return null;
            }

            @Override
            public String getShortName() {
              if (parts[1].length() == 2) {
                return parts[1];
              }
              return parts[1].split("_")[0]; // en as in en_US
            }

            @Override
            public String[] getCountries() {
              if (parts[1].length() == 2) {
                return new String[] {""};
              }
              return new String[] {parts[1].split("_")[1]}; // US as in en_US
            }

            @Override
            public String getName() {
              return parts[2].replace(".xml", "");
            }

            @Override
            public void setName(final String name) {
              // cannot be changed for this language
            }

            @Override
            public List<Class<? extends Rule>> getRelevantRules() {
              return Collections.emptyList();
            }

            @Override
            public List<String> getRuleFileNames() {
              final List<String> ruleFiles = new ArrayList<>();
              ruleFiles.add(file.getAbsolutePath());
              return ruleFiles;
            }

            @Override
            public boolean isExternal() {
              return isAdditional;
            }
          };
    }
    return newLanguage;
  }
예제 #14
0
 public static void main(String[] args) throws IOException, InterruptedException {
   if (args.length != 2 && args.length != 3) {
     System.out.println(
         "Usage: "
             + AtomFeedCheckerCmd.class.getSimpleName()
             + " <atomFeedUrl> <sleepTime> [database.properties]");
     System.out.println("  <atomFeedUrl> is a Wikipedia URL to the latest changes, for example:");
     System.out.println(
         "    https://de.wikipedia.org/w/index.php?title=Spezial:Letzte_%C3%84nderungen&feed=atom&namespace=0");
     System.out.println(
         "  <sleepTime> -1: don't loop at all (run once), 0: run in loop, other number: run in loop and");
     System.out.println("    wait this many milliseconds between runs");
     System.out.println(
         "  [database.properties] (optional) is a file that defines dbUrl, dbUser, and dbPassword,");
     System.out.println("    used to write the results to a database via JDBC");
     System.out.println("");
     System.out.println(
         "  When the database.properties file is specified, this command will store all feed changes that");
     System.out.println(
         "  cause LanguageTool rule matches to the database. If an error is then fixed later, this will");
     System.out.println(
         "  usually also be detected and the rule match in the database will be marked as fixed. One case");
     System.out.println(
         "  where this does not work is if the context of the error gets modified before the error is fixed.");
     System.out.println("");
     System.out.println(
         "  Run this command regularly so that you don't miss any changes from the feed.");
     System.out.println(
         "  As the feed may contain only the latest 50 changes, running it more often than");
     System.out.println("  once per minute may be needed for active Wikipedias.");
     System.exit(1);
   }
   String url = args[0];
   String langCode = url.substring(url.indexOf("//") + 2, url.indexOf("."));
   System.out.println("Using URL: " + url);
   System.out.println("Language code: " + langCode);
   int sleepTimeMillis = Integer.parseInt(args[1]);
   System.out.println("Sleep time: " + sleepTimeMillis + "ms (-1 = don't loop)");
   DatabaseConfig databaseConfig = null;
   if (args.length == 3) {
     String propFile = args[2];
     databaseConfig = new DatabaseConfig(propFile);
     System.out.println("Writing results to database at: " + databaseConfig.getUrl());
   }
   Language language = Language.getLanguageForShortName(langCode);
   AtomFeedChecker atomFeedChecker = new AtomFeedChecker(language, databaseConfig);
   while (true) {
     long startTime = System.currentTimeMillis();
     try {
       atomFeedChecker.runCheck(url);
       System.out.println("Run time: " + (System.currentTimeMillis() - startTime) + "ms");
       if (sleepTimeMillis == -1) {
         // don't loop at all
         break;
       } else {
         System.out.println("Sleeping " + sleepTimeMillis + "ms...");
         Thread.sleep(sleepTimeMillis);
       }
     } catch (Exception e) {
       // e.g. 50x HTTP errors, network problems
       e.printStackTrace();
       System.out.println("Sleeping " + sleepTimeMillis + "ms...");
       Thread.sleep(sleepTimeMillis);
     }
   }
 }
예제 #15
0
  protected void finalizeTokens() {
    if (!exceptionSet || tokenElement == null) {
      tokenElement =
          new Element(
              StringTools.trimWhitespace(elements.toString()),
              caseSensitive,
              regExpression,
              tokenInflected);
      tokenElement.setNegation(tokenNegated);
    } else {
      tokenElement.setStringElement(StringTools.trimWhitespace(elements.toString()));
    }

    if (skipPos != 0) {
      tokenElement.setSkipNext(skipPos);
      skipPos = 0;
    }
    if (minOccurrence != 1) {
      tokenElement.setMinOccurrence(minOccurrence);
      minOccurrence = 1;
    }
    if (maxOccurrence != 1) {
      tokenElement.setMaxOccurrence(maxOccurrence);
      maxOccurrence = 1;
    }
    if (posToken != null) {
      tokenElement.setPosElement(posToken, posRegExp, posNegation);
      posToken = null;
    }
    if (chunkTag != null) {
      tokenElement.setChunkElement(chunkTag);
      chunkTag = null;
    }

    if (tokenReference != null) {
      tokenElement.setMatch(tokenReference);
    }

    if (inAndGroup && andGroupCounter > 0) {
      elementList.get(elementList.size() - 1).setAndGroupElement(tokenElement);
    } else if (inOrGroup && orGroupCounter > 0) {
      elementList.get(elementList.size() - 1).setOrGroupElement(tokenElement);
    } else {
      elementList.add(tokenElement);
    }
    if (inAndGroup) {
      andGroupCounter++;
    }
    if (inOrGroup) {
      orGroupCounter++;
    }

    if (inUnification) {
      tokenElement.setUnification(equivalenceFeatures);
    }

    tokenElement.setInsideMarker(inMarker);

    if (inUnificationDef) {
      language.getUnifierConfiguration().setEquivalence(uFeature, uType, tokenElement);
      elementList.clear();
    }
    if (tokenSpaceBeforeSet) {
      tokenElement.setWhitespaceBefore(tokenSpaceBefore);
    }
    resetToken();
  }
 /**
  * Get the name of the ignore file, which lists words to be accepted, even when the spell checker
  * would not accept them. Unlike with {@link #getSpellingFileName()} the words in this file will
  * not be used for creating suggestions for misspelled words.
  *
  * @since 2.7
  */
 protected String getIgnoreFileName() {
   return language.getShortName() + SPELLING_IGNORE_FILE;
 }
 private boolean isIgnoredNoCase(String word) {
   return wordsToBeIgnored.contains(word)
       || (convertsCase && wordsToBeIgnored.contains(word.toLowerCase(language.getLocale())));
 }
예제 #18
0
 /**
  * Whether this rule can be used for text in the given language. Note that this just checks if
  * this rule is in the list of hard-coded rules for the given language, thus is will never return
  * {@code true} for {@link org.languagetool.rules.patterns.PatternRule}s.
  */
 public final boolean supportsLanguage(final Language language) {
   final List<Class<? extends Rule>> relevantRuleClasses = language.getRelevantRules();
   return relevantRuleClasses != null && relevantRuleClasses.contains(this.getClass());
 }
  @Override
  public final RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
    if (tokens.length < 2) {
      return toRuleMatchArray(ruleMatches);
    }
    int matchTokenPos = 1; // 0 = SENT_START
    final String firstToken = tokens[matchTokenPos].getToken();
    String secondToken = null;
    String thirdToken = null;
    // ignore quote characters:
    if (tokens.length >= 3
        && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) {
      matchTokenPos = 2;
      secondToken = tokens[matchTokenPos].getToken();
    }
    final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
    if (firstDutchToken != null) {
      thirdToken = firstDutchToken;
      matchTokenPos = 3;
    }

    String checkToken = firstToken;
    if (thirdToken != null) {
      checkToken = thirdToken;
    } else if (secondToken != null) {
      checkToken = secondToken;
    }

    String lastToken = tokens[tokens.length - 1].getToken();
    if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) {
      // ignore trailing whitespace or quote
      lastToken = tokens[tokens.length - 2].getToken();
    }

    boolean preventError = false;
    // TODO: why do only *these* languages have that special case?
    final String langCode = language.getShortName();
    final boolean languageHasSpecialCases =
        langCode.equals("ru")
            || langCode.equals("pl")
            || langCode.equals("uk")
            || langCode.equals("be")
            || langCode.equals(Locale.ENGLISH.getLanguage())
            || langCode.equals(Locale.ITALIAN.getLanguage())
            || langCode.equals(Locale.GERMAN.getLanguage());
    if (languageHasSpecialCases) {
      // fix for lists; note - this will not always work for the last point in OOo,
      // as OOo might serve paragraphs in any order.
      if (";".equals(lastParagraphString)
          || ";".equals(lastToken)
          || ",".equals(lastParagraphString)
          || ",".equals(lastToken)) {
        preventError = true;
      }
      // fix for words in table (not sentences); note - this will not always work for the last point
      // in OOo,
      // as OOo might serve paragraphs in any order.
      if (!lastToken.matches("[.?!…]")) {
        preventError = true;
      }
    }

    lastParagraphString = lastToken;

    if (checkToken.length() > 0) {
      final char firstChar = checkToken.charAt(0);
      if (!preventError && Character.isLowerCase(firstChar)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                this,
                tokens[matchTokenPos].getStartPos(),
                tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(),
                messages.getString("incorrect_case"));
        ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
        ruleMatches.add(ruleMatch);
      }
    }
    return toRuleMatchArray(ruleMatches);
  }
 /**
  * Get the name of the prohibit file, which lists words not to be accepted, even when the spell
  * checker would accept them.
  *
  * @since 2.8
  */
 protected String getProhibitFileName() {
   return language.getShortName() + SPELLING_PROHIBIT_FILE;
 }