private CombiningTagger getCombiningTagger(boolean overwrite) throws IOException {
   ManualTagger tagger1 =
       new ManualTagger(
           JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added1.txt"));
   ManualTagger tagger2 =
       new ManualTagger(
           JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added2.txt"));
   return new CombiningTagger(tagger1, tagger2, overwrite);
 }
Пример #2
0
  @Override
  protected void init() throws IOException {
    super.init();
    final String langCountry;
    if (language.getCountries().length > 0) {
      langCountry = language.getShortName() + "_" + language.getCountries()[0];
    } else {
      langCountry = language.getShortName();
    }
    final String shortDicPath = "/" + language.getShortName() + "/hunspell/" + langCountry + ".dic";
    String wordChars = "";
    // set dictionary only if there are dictionary files:
    if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) {
      final String path = getDictionaryPath(langCountry, shortDicPath);
      if ("".equals(path)) {
        hunspellDict = null;
      } else {
        hunspellDict = Hunspell.getInstance().getDictionary(path);

        if (!"".equals(hunspellDict.getWordChars())) {
          wordChars = "(?![" + hunspellDict.getWordChars().replace("-", "\\-") + "])";
        }

        addIgnoreWords();
      }
    }
    nonWordPattern = Pattern.compile(wordChars + NON_ALPHABETIC);
    needsInit = false;
  }
 /**
  * Creates a speller with the given maximum edit distance.
  *
  * @param filename path in classpath to morfologik dictionary
  * @param conversionLocale used when transforming the word to lowercase
  */
 public MorfologikSpeller(String filename, Locale conversionLocale, int maxEditDistance)
     throws IOException {
   if (maxEditDistance <= 0) {
     throw new RuntimeException("maxEditDistance must be > 0: " + maxEditDistance);
   }
   final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(filename);
   dictionary = Dictionary.read(url);
   speller = new Speller(dictionary, maxEditDistance);
   this.conversionLocale = conversionLocale != null ? conversionLocale : Locale.getDefault();
 }
Пример #4
0
 private void addIgnoreWords() throws IOException {
   hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL);
   hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL_FX);
   URL ignoreUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getIgnoreFileName());
   List<String> ignoreLines = Resources.readLines(ignoreUrl, Charsets.UTF_8);
   for (String ignoreLine : ignoreLines) {
     if (!ignoreLine.startsWith("#")) {
       hunspellDict.addWord(ignoreLine);
     }
   }
 }
Пример #5
0
 private void initializeIfRequired() throws IOException {
   // Lazy initialize fields when needed and only once.
   if (manualTagger == null) {
     synchronized (this) {
       if (manualTagger == null) {
         manualTagger =
             new ManualTagger(
                 JLanguageTool.getDataBroker().getFromResourceDirAsStream(USER_DICT_FILENAME));
       }
     }
   }
 }
Пример #6
0
 @Nullable
 private static MorfologikMultiSpeller getSpeller(Language language) {
   if (!language.getShortName().equals(Locale.GERMAN.getLanguage())) {
     throw new RuntimeException("Language is not a variant of German: " + language);
   }
   try {
     String morfoFile = "/de/hunspell/de_" + language.getCountries()[0] + ".dict";
     if (JLanguageTool.getDataBroker().resourceExists(morfoFile)) {
       // spell data will not exist in LibreOffice/OpenOffice context
       try (InputStream stream =
               JLanguageTool.getDataBroker()
                   .getFromResourceDirAsStream("/de/hunspell/spelling.txt");
           BufferedReader br = new BufferedReader(new InputStreamReader(stream, "utf-8"))) {
         return new MorfologikMultiSpeller(morfoFile, new ExpandingReader(br), MAX_EDIT_DISTANCE);
       }
     } else {
       return null;
     }
   } catch (IOException e) {
     throw new RuntimeException("Could not set up morfologik spell checker", e);
   }
 }
Пример #7
0
  private String getDictionaryPath(final String dicName, final String originalPath)
      throws IOException {

    final URL dictURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(originalPath);
    String dictionaryPath;
    // in the webstart, java EE or OSGi bundle version, we need to copy the files outside the jar
    // to the local temporary directory
    if ("jar".equals(dictURL.getProtocol())
        || "vfs".equals(dictURL.getProtocol())
        || "bundle".equals(dictURL.getProtocol())) {
      final File tempDir = new File(System.getProperty("java.io.tmpdir"));
      File tempDicFile = new File(tempDir, dicName + ".dic");
      JLanguageTool.addTemporaryFile(tempDicFile);
      try (InputStream dicStream =
          JLanguageTool.getDataBroker().getFromResourceDirAsStream(originalPath)) {
        fileCopy(dicStream, tempDicFile);
      }
      File tempAffFile = new File(tempDir, dicName + ".aff");
      JLanguageTool.addTemporaryFile(tempAffFile);
      try (InputStream affStream =
          JLanguageTool.getDataBroker()
              .getFromResourceDirAsStream(originalPath.replaceFirst(".dic$", ".aff"))) {
        fileCopy(affStream, tempAffFile);
      }
      dictionaryPath = tempDir.getAbsolutePath() + "/" + dicName;
    } else {
      final int suffixLength = ".dic".length();
      try {
        dictionaryPath = new File(dictURL.toURI()).getAbsolutePath();
        dictionaryPath = dictionaryPath.substring(0, dictionaryPath.length() - suffixLength);
      } catch (URISyntaxException e) {
        return "";
      }
    }
    return dictionaryPath;
  }
 public AbstractCompoundRule(
     final ResourceBundle messages,
     final String fileName,
     final String withHyphenMessage,
     final String withoutHyphenMessage,
     final String withOrWithoutHyphenMessage)
     throws IOException {
   if (messages != null) {
     super.setCategory(new Category(messages.getString("category_misc")));
   }
   loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(fileName), "UTF-8");
   this.withHyphenMessage = withHyphenMessage;
   this.withoutHyphenMessage = withoutHyphenMessage;
   this.withOrWithoutHyphenMessage = withOrWithoutHyphenMessage;
   setLocQualityIssueType(ITSIssueType.Misspelling);
 }
  @Test
  public void testLoadWithStrictLimits() throws IOException {
    try (InputStream inputStream =
        JLanguageTool.getDataBroker().getFromResourceDirAsStream("/yy/confusion_sets.txt")) {
      ConfusionSetLoader loader = new ConfusionSetLoader();
      Map<String, List<ConfusionSet>> map = loader.loadConfusionSet(inputStream);
      assertThat(map.size(), is(10));

      assertThat(map.get("there").size(), is(1));
      assertThat(map.get("there").get(0).getFactor(), is(10L));

      assertThat(map.get("their").size(), is(1));
      assertThat(map.get("their").get(0).getFactor(), is(10L));

      assertThat(map.get("foo").size(), is(2));
      assertThat(map.get("foo").get(0).getFactor(), is(5L));
      assertThat(map.get("foo").get(1).getFactor(), is(8L));

      assertThat(map.get("goo").size(), is(2));
      assertThat(map.get("goo").get(0).getFactor(), is(11L));
      assertThat(map.get("goo").get(1).getFactor(), is(12L));
      assertThat(map.get("lol").size(), is(1));
      assertThat(map.get("something").size(), is(1));

      assertThat(map.get("bar").size(), is(1));
      assertThat(map.get("bar").get(0).getFactor(), is(5L));

      Set<ConfusionString> there = map.get("there").get(0).getSet();
      assertTrue(getAsString(there).contains("there - example 1"));
      assertTrue(getAsString(there).contains("their - example 2"));

      Set<ConfusionString> their = map.get("their").get(0).getSet();
      assertTrue(getAsString(their).contains("there - example 1"));
      assertTrue(getAsString(their).contains("their - example 2"));
      assertFalse(getAsString(their).contains("comment"));

      Set<ConfusionString> foo = map.get("foo").get(0).getSet();
      assertTrue(getAsString(foo).contains("foo"));
      Set<ConfusionString> bar = map.get("foo").get(0).getSet();
      assertTrue(getAsString(bar).contains("bar"));
      Set<ConfusionString> baz = map.get("foo").get(1).getSet();
      assertTrue(getAsString(baz).contains("baz"));
    }
  }
 @Test(expected = IOException.class)
 public void testInvalidFile() throws Exception {
   new ManualTagger(
       JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added-invalid.txt"));
 }