private CombiningTagger getCombiningTagger(boolean overwrite) throws IOException { ManualTagger tagger1 = new ManualTagger( JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added1.txt")); ManualTagger tagger2 = new ManualTagger( JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added2.txt")); return new CombiningTagger(tagger1, tagger2, overwrite); }
@Override protected void init() throws IOException { super.init(); final String langCountry; if (language.getCountries().length > 0) { langCountry = language.getShortName() + "_" + language.getCountries()[0]; } else { langCountry = language.getShortName(); } final String shortDicPath = "/" + language.getShortName() + "/hunspell/" + langCountry + ".dic"; String wordChars = ""; // set dictionary only if there are dictionary files: if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) { final String path = getDictionaryPath(langCountry, shortDicPath); if ("".equals(path)) { hunspellDict = null; } else { hunspellDict = Hunspell.getInstance().getDictionary(path); if (!"".equals(hunspellDict.getWordChars())) { wordChars = "(?![" + hunspellDict.getWordChars().replace("-", "\\-") + "])"; } addIgnoreWords(); } } nonWordPattern = Pattern.compile(wordChars + NON_ALPHABETIC); needsInit = false; }
/** * Creates a speller with the given maximum edit distance. * * @param filename path in classpath to morfologik dictionary * @param conversionLocale used when transforming the word to lowercase */ public MorfologikSpeller(String filename, Locale conversionLocale, int maxEditDistance) throws IOException { if (maxEditDistance <= 0) { throw new RuntimeException("maxEditDistance must be > 0: " + maxEditDistance); } final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(filename); dictionary = Dictionary.read(url); speller = new Speller(dictionary, maxEditDistance); this.conversionLocale = conversionLocale != null ? conversionLocale : Locale.getDefault(); }
private void addIgnoreWords() throws IOException { hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL); hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL_FX); URL ignoreUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getIgnoreFileName()); List<String> ignoreLines = Resources.readLines(ignoreUrl, Charsets.UTF_8); for (String ignoreLine : ignoreLines) { if (!ignoreLine.startsWith("#")) { hunspellDict.addWord(ignoreLine); } } }
private void initializeIfRequired() throws IOException { // Lazy initialize fields when needed and only once. if (manualTagger == null) { synchronized (this) { if (manualTagger == null) { manualTagger = new ManualTagger( JLanguageTool.getDataBroker().getFromResourceDirAsStream(USER_DICT_FILENAME)); } } } }
@Nullable private static MorfologikMultiSpeller getSpeller(Language language) { if (!language.getShortName().equals(Locale.GERMAN.getLanguage())) { throw new RuntimeException("Language is not a variant of German: " + language); } try { String morfoFile = "/de/hunspell/de_" + language.getCountries()[0] + ".dict"; if (JLanguageTool.getDataBroker().resourceExists(morfoFile)) { // spell data will not exist in LibreOffice/OpenOffice context try (InputStream stream = JLanguageTool.getDataBroker() .getFromResourceDirAsStream("/de/hunspell/spelling.txt"); BufferedReader br = new BufferedReader(new InputStreamReader(stream, "utf-8"))) { return new MorfologikMultiSpeller(morfoFile, new ExpandingReader(br), MAX_EDIT_DISTANCE); } } else { return null; } } catch (IOException e) { throw new RuntimeException("Could not set up morfologik spell checker", e); } }
private String getDictionaryPath(final String dicName, final String originalPath) throws IOException { final URL dictURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(originalPath); String dictionaryPath; // in the webstart, java EE or OSGi bundle version, we need to copy the files outside the jar // to the local temporary directory if ("jar".equals(dictURL.getProtocol()) || "vfs".equals(dictURL.getProtocol()) || "bundle".equals(dictURL.getProtocol())) { final File tempDir = new File(System.getProperty("java.io.tmpdir")); File tempDicFile = new File(tempDir, dicName + ".dic"); JLanguageTool.addTemporaryFile(tempDicFile); try (InputStream dicStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(originalPath)) { fileCopy(dicStream, tempDicFile); } File tempAffFile = new File(tempDir, dicName + ".aff"); JLanguageTool.addTemporaryFile(tempAffFile); try (InputStream affStream = JLanguageTool.getDataBroker() .getFromResourceDirAsStream(originalPath.replaceFirst(".dic$", ".aff"))) { fileCopy(affStream, tempAffFile); } dictionaryPath = tempDir.getAbsolutePath() + "/" + dicName; } else { final int suffixLength = ".dic".length(); try { dictionaryPath = new File(dictURL.toURI()).getAbsolutePath(); dictionaryPath = dictionaryPath.substring(0, dictionaryPath.length() - suffixLength); } catch (URISyntaxException e) { return ""; } } return dictionaryPath; }
public AbstractCompoundRule( final ResourceBundle messages, final String fileName, final String withHyphenMessage, final String withoutHyphenMessage, final String withOrWithoutHyphenMessage) throws IOException { if (messages != null) { super.setCategory(new Category(messages.getString("category_misc"))); } loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(fileName), "UTF-8"); this.withHyphenMessage = withHyphenMessage; this.withoutHyphenMessage = withoutHyphenMessage; this.withOrWithoutHyphenMessage = withOrWithoutHyphenMessage; setLocQualityIssueType(ITSIssueType.Misspelling); }
@Test public void testLoadWithStrictLimits() throws IOException { try (InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/yy/confusion_sets.txt")) { ConfusionSetLoader loader = new ConfusionSetLoader(); Map<String, List<ConfusionSet>> map = loader.loadConfusionSet(inputStream); assertThat(map.size(), is(10)); assertThat(map.get("there").size(), is(1)); assertThat(map.get("there").get(0).getFactor(), is(10L)); assertThat(map.get("their").size(), is(1)); assertThat(map.get("their").get(0).getFactor(), is(10L)); assertThat(map.get("foo").size(), is(2)); assertThat(map.get("foo").get(0).getFactor(), is(5L)); assertThat(map.get("foo").get(1).getFactor(), is(8L)); assertThat(map.get("goo").size(), is(2)); assertThat(map.get("goo").get(0).getFactor(), is(11L)); assertThat(map.get("goo").get(1).getFactor(), is(12L)); assertThat(map.get("lol").size(), is(1)); assertThat(map.get("something").size(), is(1)); assertThat(map.get("bar").size(), is(1)); assertThat(map.get("bar").get(0).getFactor(), is(5L)); Set<ConfusionString> there = map.get("there").get(0).getSet(); assertTrue(getAsString(there).contains("there - example 1")); assertTrue(getAsString(there).contains("their - example 2")); Set<ConfusionString> their = map.get("their").get(0).getSet(); assertTrue(getAsString(their).contains("there - example 1")); assertTrue(getAsString(their).contains("their - example 2")); assertFalse(getAsString(their).contains("comment")); Set<ConfusionString> foo = map.get("foo").get(0).getSet(); assertTrue(getAsString(foo).contains("foo")); Set<ConfusionString> bar = map.get("foo").get(0).getSet(); assertTrue(getAsString(bar).contains("bar")); Set<ConfusionString> baz = map.get("foo").get(1).getSet(); assertTrue(getAsString(baz).contains("baz")); } }
@Test(expected = IOException.class) public void testInvalidFile() throws Exception { new ManualTagger( JLanguageTool.getDataBroker().getFromResourceDirAsStream("/xx/added-invalid.txt")); }