private void assertBadWithMessage(String s, String expectedErrorSubstring) throws IOException {
   assertEquals(1, rule.match(langTool.getAnalyzedSentence(s)).length);
   final String errorMessage = rule.match(langTool.getAnalyzedSentence(s))[0].getMessage();
   assertTrue(
       "Got error '" + errorMessage + "', expected substring '" + expectedErrorSubstring + "'",
       errorMessage.contains(expectedErrorSubstring));
 }
  public void testCompareLists() throws IOException {
    AnalyzedSentence sentence1 = langTool.getAnalyzedSentence("Hier ein Test");
    assertTrue(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(), 0, 2, new String[] {"", "Hier", "ein"}));
    assertTrue(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(), 1, 2, new String[] {"Hier", "ein"}));
    assertTrue(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(),
            0,
            3,
            new String[] {"", "Hier", "ein", "Test"}));
    assertFalse(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(),
            0,
            4,
            new String[] {"", "Hier", "ein", "Test"}));

    AnalyzedSentence sentence2 = langTool.getAnalyzedSentence("das Heilige Römische Reich");
    assertTrue(
        rule.compareLists(
            sentence2.getTokensWithoutWhitespace(),
            0,
            4,
            new String[] {"", "das", "Heilige", "Römische", "Reich"}));
    assertFalse(
        rule.compareLists(
            sentence2.getTokensWithoutWhitespace(),
            8,
            11,
            new String[] {"", "das", "Heilige", "Römische", "Reich"}));
  }
 public static void main(String[] args) throws TwitterException, IOException {
   Twitter twitter = TwitterFactory.getSingleton();
   JLanguageTool langTool = new JLanguageTool(new AmericanEnglish());
   List<String> twts = new ArrayList<String>();
   for (String arg : args) {
     Query query = new Query(arg);
     QueryResult result;
     int counter = 0;
     do {
       result = twitter.search(query);
       List<Status> tweets = result.getTweets();
       for (Status tweet : tweets) {
         if (isEligible(tweet)) {
           System.out.println("@" + tweet.getUser().getScreenName() + " - " + tweet.getText());
           System.out.println(tweet.getLang());
           twts.add(tweet.getText());
           counter++;
         }
       }
     } while ((query = result.nextQuery()) != null && counter < 5);
   }
   for (String str : twts) {
     List<RuleMatch> matches = langTool.check(str);
     for (RuleMatch match : matches) {
       System.out.println(
           "Potential error at line "
               + match.getLine()
               + ", column "
               + match.getColumn()
               + ": "
               + match.getMessage());
       System.out.println("Suggested correction: " + match.getSuggestedReplacements());
     }
   }
 }
 public void testRegression() throws IOException {
   JLanguageTool gramCheckerEngine = new JLanguageTool(new German());
   gramCheckerEngine.activateDefaultPatternRules();
   // used to be not detected > 1.0.1:
   String str = "Und so.\r\nDie Bier.";
   List<RuleMatch> matches = gramCheckerEngine.check(str);
   assertEquals(1, matches.size());
 }
 private static void tagText(final String contents, final JLanguageTool lt) throws IOException {
   AnalyzedSentence analyzedText;
   final List<String> sentences = lt.sentenceTokenize(contents);
   for (final String sentence : sentences) {
     analyzedText = lt.getAnalyzedSentence(sentence);
     System.out.println(getSentence(analyzedText));
   }
 }
  public void testPositions() throws IOException {
    final AccentuationCheckRule rule = new AccentuationCheckRule(TestTools.getEnglishMessages());
    final RuleMatch[] matches;
    final JLanguageTool langTool = new JLanguageTool(new Catalan());

    matches = rule.match(langTool.getAnalyzedSentence("Són circumstancies extraordinàries."));
    assertEquals(4, matches[0].getFromPos());
    assertEquals(18, matches[0].getToPos());
  }
  public void testChunker() throws Exception {
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();

    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
  }
 private CombiningTagger getCombiningTagger(boolean overwrite) throws IOException {
   ManualTagger tagger1 =
       new ManualTagger(
           JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added1.txt"));
   ManualTagger tagger2 =
       new ManualTagger(
           JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added2.txt"));
   return new CombiningTagger(tagger1, tagger2, overwrite);
 }
示例#9
0
 private JLanguageTool getLanguageToolWithOneRule(Language lang, PatternRule patternRule)
     throws IOException {
   final JLanguageTool langTool = new JLanguageTool(lang);
   for (Rule rule : langTool.getAllActiveRules()) {
     langTool.disableRule(rule.getId());
   }
   langTool.addRule(patternRule);
   return langTool;
 }
 private void setUpRule(Language language) throws IOException {
   langTool = new JLanguageTool(language);
   for (Rule rule : langTool.getAllRules()) {
     langTool.disableRule(rule.getId());
   }
   GenericUnpairedBracketsRule rule =
       new GenericUnpairedBracketsRule(TestTools.getEnglishMessages(), language);
   langTool.addRule(rule);
 }
  public void testRule() throws IOException {

    // correct sentences:
    // assertCorrect("els etiquetadors sobre els etiquetats.");
    assertCorrect("tot tenyit amb llum de nostàlgia");
    assertCorrect("Ho van fer per duplicat.");
    assertCorrect("Assecat el braç del riu");
    assertCorrect("el llibre empaquetat");
    assertCorrect("un resultat equilibrat");
    assertCorrect("el nostre equip era bastant equilibrat");
    assertCorrect("un llibre ben empaquetat");
    assertCorrect("l'informe filtrat pel ministre");
    assertCorrect("L'informe filtrat és terrible");
    assertCorrect("ha liderat la batalla");
    assertCorrect("Els tinc empaquetats");
    assertCorrect("amb tractament unitari i equilibrat");
    assertCorrect("Processat després de la mort de Carles II");
    assertCorrect("Processat diverses vegades");
    assertCorrect("moltes vegades empaquetat amb pressa");
    assertCorrect("és llavors embotellat i llançat al mercat");
    assertCorrect("la comercialització de vi embotellat amb les firmes comercials");
    assertCorrect("eixia al mercat el vi blanc embotellat amb la marca");
    assertCorrect("que arribi a un equilibrat matrimoni");
    assertCorrect("És un cafè amb molt de cos i molt equilibrat.");
    assertCorrect("i per tant etiquetat com a observat");
    assertCorrect("Molt equilibrat en les seves característiques");
    assertCorrect("filtrat per Wikileaks");
    assertCorrect("una vegada filtrat");
    assertCorrect("no equilibrat");

    // errors:
    assertIncorrect("Assecat del braç del riu");
    assertIncorrect("Cal vigilar el filtrat del vi");
    assertIncorrect("El procés d'empaquetat");
    assertIncorrect("Els equilibrats de les rodes");
    // assertIncorrect("Duplicat de claus");
    assertIncorrect("El procés d'etiquetat de les ampolles");
    assertIncorrect("El rentat de cotes");

    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("El repicat i el rejuntat."));
    assertEquals(2, matches.length);

    matches = rule.match(langTool.getAnalyzedSentence("El procés de relligat dels llibres."));
    assertEquals(1, matches.length);
    assertEquals("relligadura", matches[0].getSuggestedReplacements().get(0));
    assertEquals("relligament", matches[0].getSuggestedReplacements().get(1));
    assertEquals("relligada", matches[0].getSuggestedReplacements().get(2));

    matches = rule.match(langTool.getAnalyzedSentence("Els rentats de cervell."));
    assertEquals(1, matches.length);
    assertEquals("rentades", matches[0].getSuggestedReplacements().get(0));
    assertEquals("rentatges", matches[0].getSuggestedReplacements().get(1));
    assertEquals("rentaments", matches[0].getSuggestedReplacements().get(2));
  }
  @Test
  public void testRule() throws IOException {
    DifferentPunctuationRule rule = new DifferentPunctuationRule();
    RuleMatch[] matches;
    JLanguageTool srcLangTool = new JLanguageTool(TestTools.getDemoLanguage());
    JLanguageTool trgLangTool = new JLanguageTool(new FakeLanguage());
    rule.setSourceLanguage(TestTools.getDemoLanguage());
    // correct sentences:
    matches =
        rule.match(
            srcLangTool.getAnalyzedSentence("This is a test sentence!"),
            trgLangTool.getAnalyzedSentence("C'est la vie!"));
    assertEquals(0, matches.length);

    matches =
        rule.match(
            srcLangTool.getAnalyzedSentence("one sentence"),
            trgLangTool.getAnalyzedSentence("jedno zdanie"));
    assertEquals(0, matches.length);

    // incorrect sentences:
    matches =
        rule.match(
            srcLangTool.getAnalyzedSentence("This this is a test sentence."),
            trgLangTool.getAnalyzedSentence("This this is a test sentence!"));
    assertEquals(1, matches.length);
  }
  public void testMultipleSentences() throws IOException {
    final JLanguageTool tool = new JLanguageTool(new Catalan());
    tool.enableRule("CA_UNPAIRED_BRACKETS");

    List<RuleMatch> matches;
    matches =
        tool.check(
            "Aquesta és una sentència múltiple amb claudàtors: "
                + "[Ací hi ha un claudàtor. Amb algun text.] i ací continua.\n");
    assertEquals(0, matches.size());
    matches = tool.check("\"Sóc la teva filla. El corcó no et rosegarà més.\"\n\n");
    assertEquals(0, matches.size());
    matches = tool.check("\"Sóc la teva filla. El corcó no et rosegarà més\".\n\n");
    assertEquals(0, matches.size());
    matches =
        tool.check(
            "Aquesta és una sentència múltiple amb claudàtors: "
                + "[Ací hi ha un claudàtor. Amb algun text. I ací continua.\n\n");
    assertEquals(1, matches.size());

    matches = tool.check("«Els manaments diuen: \"No desitjaràs la dona del teu veí\"»");
    // assertEquals(0, matches.size());

    // now with a paragraph end inside - we get two alarms because of paragraph
    // resetting
    matches =
        tool.check(
            "Aquesta és una sentència múltiple amb parèntesis "
                + "(Ací hi ha un parèntesi. \n\n Amb algun text.) i ací continua.");
    assertEquals(2, matches.size());
  }
  @Test
  public void testRule() throws IOException {
    final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
    final JLanguageTool langTool = new JLanguageTool(new Ukrainian());

    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);

    // incorrect sentences:

    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смi\u00ADття"));
    // check match positions:
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
  }
 private static void runOnStdIn(final JLanguageTool lt) throws IOException {
   final int MAX_FILE_SIZE = 64_000;
   InputStreamReader isr = null;
   BufferedReader br = null;
   StringBuilder sb = new StringBuilder();
   try {
     isr = new InputStreamReader(new BufferedInputStream(System.in));
     br = new BufferedReader(isr);
     String line;
     while ((line = br.readLine()) != null) {
       sb.append(line);
       sb.append('\n');
       if (lt.getLanguage().getSentenceTokenizer().singleLineBreaksMarksPara()) {
         tagText(sb.toString(), lt);
         sb = new StringBuilder();
       } else {
         if ("".equals(line) || sb.length() >= MAX_FILE_SIZE) {
           tagText(sb.toString(), lt);
           sb = new StringBuilder();
         }
       }
     }
   } finally {
     if (sb.length() > 0) {
       tagText(sb.toString(), lt);
     }
   }
   br.close();
   isr.close();
 }
示例#16
0
  @Override
  protected void init() throws IOException {
    super.init();
    final String langCountry;
    if (language.getCountries().length > 0) {
      langCountry = language.getShortName() + "_" + language.getCountries()[0];
    } else {
      langCountry = language.getShortName();
    }
    final String shortDicPath = "/" + language.getShortName() + "/hunspell/" + langCountry + ".dic";
    String wordChars = "";
    // set dictionary only if there are dictionary files:
    if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) {
      final String path = getDictionaryPath(langCountry, shortDicPath);
      if ("".equals(path)) {
        hunspellDict = null;
      } else {
        hunspellDict = Hunspell.getInstance().getDictionary(path);

        if (!"".equals(hunspellDict.getWordChars())) {
          wordChars = "(?![" + hunspellDict.getWordChars().replace("-", "\\-") + "])";
        }

        addIgnoreWords();
      }
    }
    nonWordPattern = Pattern.compile(wordChars + NON_ALPHABETIC);
    needsInit = false;
  }
 private void assertMatches(String input, int expectedMatches) throws IOException {
   List<RuleMatch> ruleMatches = langTool.check(input);
   assertEquals(
       "Expected " + expectedMatches + " matches, got: " + ruleMatches,
       expectedMatches,
       ruleMatches.size());
 }
示例#18
0
 public ConfigurationDialog(Frame owner, boolean insideOOo, Configuration config) {
   this.owner = owner;
   this.insideOOo = insideOOo;
   this.original = config;
   this.config = original.copy(original);
   messages = JLanguageTool.getMessageBundle();
 }
示例#19
0
 private List<MatchingSentence> findMatchingSentences(
     IndexSearcher indexSearcher, TopDocs topDocs, JLanguageTool languageTool) throws IOException {
   final List<MatchingSentence> matchingSentences = new ArrayList<>();
   for (ScoreDoc match : topDocs.scoreDocs) {
     final Document doc = indexSearcher.doc(match.doc);
     final String sentence = doc.get(FIELD_NAME);
     final List<RuleMatch> ruleMatches = languageTool.check(sentence);
     if (ruleMatches.size() > 0) {
       final String source = doc.get(SOURCE_FIELD_NAME);
       final AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
       final MatchingSentence matchingSentence =
           new MatchingSentence(sentence, source, analyzedSentence, ruleMatches);
       matchingSentences.add(matchingSentence);
     }
   }
   return matchingSentences;
 }
示例#20
0
 @Deprecated
 public ConfigurationDialog(Frame owner, boolean insideOOo) {
   this.owner = owner;
   this.insideOOo = insideOOo;
   this.original = null;
   this.config = new Configuration();
   messages = JLanguageTool.getMessageBundle();
 }
 private void assertBad(String s, String... expectedSuggestions) throws IOException {
   RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s));
   assertEquals("Did not find one match in sentence '" + s + "'", 1, matches.length);
   if (expectedSuggestions.length > 0) {
     RuleMatch match = matches[0];
     List<String> suggestions = match.getSuggestedReplacements();
     assertThat(suggestions, is(Arrays.asList(expectedSuggestions)));
   }
 }
 /**
  * Creates a speller with the given maximum edit distance.
  *
  * @param filename path in classpath to morfologik dictionary
  * @param conversionLocale used when transforming the word to lowercase
  */
 public MorfologikSpeller(String filename, Locale conversionLocale, int maxEditDistance)
     throws IOException {
   if (maxEditDistance <= 0) {
     throw new RuntimeException("maxEditDistance must be > 0: " + maxEditDistance);
   }
   final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(filename);
   dictionary = Dictionary.read(url);
   speller = new Speller(dictionary, maxEditDistance);
   this.conversionLocale = conversionLocale != null ? conversionLocale : Locale.getDefault();
 }
示例#23
0
 private void addIgnoreWords() throws IOException {
   hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL);
   hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL_FX);
   URL ignoreUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getIgnoreFileName());
   List<String> ignoreLines = Resources.readLines(ignoreUrl, Charsets.UTF_8);
   for (String ignoreLine : ignoreLines) {
     if (!ignoreLine.startsWith("#")) {
       hunspellDict.addWord(ignoreLine);
     }
   }
 }
示例#24
0
 private void initializeIfRequired() throws IOException {
   // Lazy initialize fields when needed and only once.
   if (manualTagger == null) {
     synchronized (this) {
       if (manualTagger == null) {
         manualTagger =
             new ManualTagger(
                 JLanguageTool.getDataBroker().getFromResourceDirAsStream(USER_DICT_FILENAME));
       }
     }
   }
 }
 @Nullable
 private static MorfologikMultiSpeller getSpeller(Language language) {
   if (!language.getShortName().equals(Locale.GERMAN.getLanguage())) {
     throw new RuntimeException("Language is not a variant of German: " + language);
   }
   try {
     String morfoFile = "/de/hunspell/de_" + language.getCountries()[0] + ".dict";
     if (JLanguageTool.getDataBroker().resourceExists(morfoFile)) {
       // spell data will not exist in LibreOffice/OpenOffice context
       try (InputStream stream =
               JLanguageTool.getDataBroker()
                   .getFromResourceDirAsStream("/de/hunspell/spelling.txt");
           BufferedReader br = new BufferedReader(new InputStreamReader(stream, "utf-8"))) {
         return new MorfologikMultiSpeller(morfoFile, new ExpandingReader(br), MAX_EDIT_DISTANCE);
       }
     } else {
       return null;
     }
   } catch (IOException e) {
     throw new RuntimeException("Could not set up morfologik spell checker", e);
   }
 }
 private void assertBad(String s, int n, String... expectedSuggestions) throws IOException {
   RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s));
   assertEquals("Did not find " + n + " match(es) in sentence '" + s + "'", n, matches.length);
   if (expectedSuggestions.length > 0) {
     RuleMatch match = matches[0];
     // When two errors are reported by the rule (so TODO above), it might happen that the first
     // match does not have the suggestions, but the second one
     if (matches.length > 1 && match.getSuggestedReplacements().size() == 0) {
       match = matches[1];
     }
     List<String> suggestions = match.getSuggestedReplacements();
     assertThat(suggestions, is(Arrays.asList(expectedSuggestions)));
   }
 }
 private void run(Language lang) throws IOException {
   File basePath = new File("/lt/git/languagetool/languagetool-language-modules");
   if (!basePath.exists()) {
     throw new RuntimeException("basePath does not exist: " + basePath);
   }
   String langCode = lang.getShortName();
   File xml =
       new File(
           basePath,
           "/"
               + langCode
               + "/src/main/resources/org/languagetool/rules/"
               + langCode
               + "/grammar.xml");
   List<String> xmlLines = IOUtils.readLines(new FileReader(xml));
   JLanguageTool tool = new JLanguageTool(lang);
   int totalRules = 0;
   for (Rule rule : tool.getAllActiveRules()) {
     if (!(rule instanceof PatternRule)) {
       continue;
     }
     PatternRule patternRule = (PatternRule) rule;
     String id = patternRule.getFullId();
     if (isSimple((PatternRule) rule)) {
       System.err.println("Simplifying: " + id);
       simplify(patternRule, xmlLines);
     } else {
       System.err.println("Can't simplify: " + id);
     }
     totalRules++;
   }
   System.err.println("touchedRulesCount: " + touchedRulesCount + " out of " + totalRules);
   for (String xmlLine : xmlLines) {
     System.out.println(xmlLine);
   }
 }
示例#28
0
  private String getDictionaryPath(final String dicName, final String originalPath)
      throws IOException {

    final URL dictURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(originalPath);
    String dictionaryPath;
    // in the webstart, java EE or OSGi bundle version, we need to copy the files outside the jar
    // to the local temporary directory
    if ("jar".equals(dictURL.getProtocol())
        || "vfs".equals(dictURL.getProtocol())
        || "bundle".equals(dictURL.getProtocol())) {
      final File tempDir = new File(System.getProperty("java.io.tmpdir"));
      File tempDicFile = new File(tempDir, dicName + ".dic");
      JLanguageTool.addTemporaryFile(tempDicFile);
      try (InputStream dicStream =
          JLanguageTool.getDataBroker().getFromResourceDirAsStream(originalPath)) {
        fileCopy(dicStream, tempDicFile);
      }
      File tempAffFile = new File(tempDir, dicName + ".aff");
      JLanguageTool.addTemporaryFile(tempAffFile);
      try (InputStream affStream =
          JLanguageTool.getDataBroker()
              .getFromResourceDirAsStream(originalPath.replaceFirst(".dic$", ".aff"))) {
        fileCopy(affStream, tempAffFile);
      }
      dictionaryPath = tempDir.getAbsolutePath() + "/" + dicName;
    } else {
      final int suffixLength = ".dic".length();
      try {
        dictionaryPath = new File(dictURL.toURI()).getAbsolutePath();
        dictionaryPath = dictionaryPath.substring(0, dictionaryPath.length() - suffixLength);
      } catch (URISyntaxException e) {
        return "";
      }
    }
    return dictionaryPath;
  }
 public AbstractCompoundRule(
     final ResourceBundle messages,
     final String fileName,
     final String withHyphenMessage,
     final String withoutHyphenMessage,
     final String withOrWithoutHyphenMessage)
     throws IOException {
   if (messages != null) {
     super.setCategory(new Category(messages.getString("category_misc")));
   }
   loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(fileName), "UTF-8");
   this.withHyphenMessage = withHyphenMessage;
   this.withoutHyphenMessage = withoutHyphenMessage;
   this.withOrWithoutHyphenMessage = withOrWithoutHyphenMessage;
   setLocQualityIssueType(ITSIssueType.Misspelling);
 }
  @Test
  public void testLoadWithStrictLimits() throws IOException {
    try (InputStream inputStream =
        JLanguageTool.getDataBroker().getFromResourceDirAsStream("/yy/confusion_sets.txt")) {
      ConfusionSetLoader loader = new ConfusionSetLoader();
      Map<String, List<ConfusionSet>> map = loader.loadConfusionSet(inputStream);
      assertThat(map.size(), is(10));

      assertThat(map.get("there").size(), is(1));
      assertThat(map.get("there").get(0).getFactor(), is(10L));

      assertThat(map.get("their").size(), is(1));
      assertThat(map.get("their").get(0).getFactor(), is(10L));

      assertThat(map.get("foo").size(), is(2));
      assertThat(map.get("foo").get(0).getFactor(), is(5L));
      assertThat(map.get("foo").get(1).getFactor(), is(8L));

      assertThat(map.get("goo").size(), is(2));
      assertThat(map.get("goo").get(0).getFactor(), is(11L));
      assertThat(map.get("goo").get(1).getFactor(), is(12L));
      assertThat(map.get("lol").size(), is(1));
      assertThat(map.get("something").size(), is(1));

      assertThat(map.get("bar").size(), is(1));
      assertThat(map.get("bar").get(0).getFactor(), is(5L));

      Set<ConfusionString> there = map.get("there").get(0).getSet();
      assertTrue(getAsString(there).contains("there - example 1"));
      assertTrue(getAsString(there).contains("their - example 2"));

      Set<ConfusionString> their = map.get("their").get(0).getSet();
      assertTrue(getAsString(their).contains("there - example 1"));
      assertTrue(getAsString(their).contains("their - example 2"));
      assertFalse(getAsString(their).contains("comment"));

      Set<ConfusionString> foo = map.get("foo").get(0).getSet();
      assertTrue(getAsString(foo).contains("foo"));
      Set<ConfusionString> bar = map.get("foo").get(0).getSet();
      assertTrue(getAsString(bar).contains("bar"));
      Set<ConfusionString> baz = map.get("foo").get(1).getSet();
      assertTrue(getAsString(baz).contains("baz"));
    }
  }