public void testCompareLists() throws IOException { AnalyzedSentence sentence1 = langTool.getAnalyzedSentence("Hier ein Test"); assertTrue( rule.compareLists( sentence1.getTokensWithoutWhitespace(), 0, 2, new String[] {"", "Hier", "ein"})); assertTrue( rule.compareLists( sentence1.getTokensWithoutWhitespace(), 1, 2, new String[] {"Hier", "ein"})); assertTrue( rule.compareLists( sentence1.getTokensWithoutWhitespace(), 0, 3, new String[] {"", "Hier", "ein", "Test"})); assertFalse( rule.compareLists( sentence1.getTokensWithoutWhitespace(), 0, 4, new String[] {"", "Hier", "ein", "Test"})); AnalyzedSentence sentence2 = langTool.getAnalyzedSentence("das Heilige Römische Reich"); assertTrue( rule.compareLists( sentence2.getTokensWithoutWhitespace(), 0, 4, new String[] {"", "das", "Heilige", "Römische", "Reich"})); assertFalse( rule.compareLists( sentence2.getTokensWithoutWhitespace(), 8, 11, new String[] {"", "das", "Heilige", "Römische", "Reich"})); }
private void assertBadWithMessage(String s, String expectedErrorSubstring) throws IOException { assertEquals(1, rule.match(langTool.getAnalyzedSentence(s)).length); final String errorMessage = rule.match(langTool.getAnalyzedSentence(s))[0].getMessage(); assertTrue( "Got error '" + errorMessage + "', expected substring '" + expectedErrorSubstring + "'", errorMessage.contains(expectedErrorSubstring)); }
public void testRule() throws IOException { // correct sentences: // assertCorrect("els etiquetadors sobre els etiquetats."); assertCorrect("tot tenyit amb llum de nostàlgia"); assertCorrect("Ho van fer per duplicat."); assertCorrect("Assecat el braç del riu"); assertCorrect("el llibre empaquetat"); assertCorrect("un resultat equilibrat"); assertCorrect("el nostre equip era bastant equilibrat"); assertCorrect("un llibre ben empaquetat"); assertCorrect("l'informe filtrat pel ministre"); assertCorrect("L'informe filtrat és terrible"); assertCorrect("ha liderat la batalla"); assertCorrect("Els tinc empaquetats"); assertCorrect("amb tractament unitari i equilibrat"); assertCorrect("Processat després de la mort de Carles II"); assertCorrect("Processat diverses vegades"); assertCorrect("moltes vegades empaquetat amb pressa"); assertCorrect("és llavors embotellat i llançat al mercat"); assertCorrect("la comercialització de vi embotellat amb les firmes comercials"); assertCorrect("eixia al mercat el vi blanc embotellat amb la marca"); assertCorrect("que arribi a un equilibrat matrimoni"); assertCorrect("És un cafè amb molt de cos i molt equilibrat."); assertCorrect("i per tant etiquetat com a observat"); assertCorrect("Molt equilibrat en les seves característiques"); assertCorrect("filtrat per Wikileaks"); assertCorrect("una vegada filtrat"); assertCorrect("no equilibrat"); // errors: assertIncorrect("Assecat del braç del riu"); assertIncorrect("Cal vigilar el filtrat del vi"); assertIncorrect("El procés d'empaquetat"); assertIncorrect("Els equilibrats de les rodes"); // assertIncorrect("Duplicat de claus"); assertIncorrect("El procés d'etiquetat de les ampolles"); assertIncorrect("El rentat de cotes"); RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("El repicat i el rejuntat.")); assertEquals(2, matches.length); matches = rule.match(langTool.getAnalyzedSentence("El procés de relligat dels llibres.")); assertEquals(1, matches.length); assertEquals("relligadura", matches[0].getSuggestedReplacements().get(0)); assertEquals("relligament", matches[0].getSuggestedReplacements().get(1)); assertEquals("relligada", matches[0].getSuggestedReplacements().get(2)); matches = rule.match(langTool.getAnalyzedSentence("Els rentats de cervell.")); assertEquals(1, matches.length); assertEquals("rentades", matches[0].getSuggestedReplacements().get(0)); assertEquals("rentatges", matches[0].getSuggestedReplacements().get(1)); assertEquals("rentaments", matches[0].getSuggestedReplacements().get(2)); }
@Test public void testRule() throws IOException { final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk")); final JLanguageTool langTool = new JLanguageTool(new Ukrainian()); // correct sentences: assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length); // incorrect sentences: RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смi\u00ADття")); // check match positions: assertEquals(1, matches.length); assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements()); }
private static void tagText(final String contents, final JLanguageTool lt) throws IOException { AnalyzedSentence analyzedText; final List<String> sentences = lt.sentenceTokenize(contents); for (final String sentence : sentences) { analyzedText = lt.getAnalyzedSentence(sentence); System.out.println(getSentence(analyzedText)); } }
public void testChunker() throws Exception { JLanguageTool lt = new JLanguageTool(new Ukrainian()); AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для годиться."); AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence); AnalyzedTokenReadings[] tokens = disambiguated.getTokens(); assertTrue(tokens[1].getReadings().toString().contains("<adv>")); assertTrue(tokens[4].getReadings().toString().contains("</adv>")); }
private void assertBad(String s, String... expectedSuggestions) throws IOException { RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s)); assertEquals("Did not find one match in sentence '" + s + "'", 1, matches.length); if (expectedSuggestions.length > 0) { RuleMatch match = matches[0]; List<String> suggestions = match.getSuggestedReplacements(); assertThat(suggestions, is(Arrays.asList(expectedSuggestions))); } }
public void testPositions() throws IOException { final AccentuationCheckRule rule = new AccentuationCheckRule(TestTools.getEnglishMessages()); final RuleMatch[] matches; final JLanguageTool langTool = new JLanguageTool(new Catalan()); matches = rule.match(langTool.getAnalyzedSentence("Són circumstancies extraordinàries.")); assertEquals(4, matches[0].getFromPos()); assertEquals(18, matches[0].getToPos()); }
@Test public void testRule() throws IOException { DifferentPunctuationRule rule = new DifferentPunctuationRule(); RuleMatch[] matches; JLanguageTool srcLangTool = new JLanguageTool(TestTools.getDemoLanguage()); JLanguageTool trgLangTool = new JLanguageTool(new FakeLanguage()); rule.setSourceLanguage(TestTools.getDemoLanguage()); // correct sentences: matches = rule.match( srcLangTool.getAnalyzedSentence("This is a test sentence!"), trgLangTool.getAnalyzedSentence("C'est la vie!")); assertEquals(0, matches.length); matches = rule.match( srcLangTool.getAnalyzedSentence("one sentence"), trgLangTool.getAnalyzedSentence("jedno zdanie")); assertEquals(0, matches.length); // incorrect sentences: matches = rule.match( srcLangTool.getAnalyzedSentence("This this is a test sentence."), trgLangTool.getAnalyzedSentence("This this is a test sentence!")); assertEquals(1, matches.length); }
private void assertBad(String s, int n, String... expectedSuggestions) throws IOException { RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s)); assertEquals("Did not find " + n + " match(es) in sentence '" + s + "'", n, matches.length); if (expectedSuggestions.length > 0) { RuleMatch match = matches[0]; // When two errors are reported by the rule (so TODO above), it might happen that the first // match does not have the suggestions, but the second one if (matches.length > 1 && match.getSuggestedReplacements().size() == 0) { match = matches[1]; } List<String> suggestions = match.getSuggestedReplacements(); assertThat(suggestions, is(Arrays.asList(expectedSuggestions))); } }
private List<MatchingSentence> findMatchingSentences( IndexSearcher indexSearcher, TopDocs topDocs, JLanguageTool languageTool) throws IOException { final List<MatchingSentence> matchingSentences = new ArrayList<>(); for (ScoreDoc match : topDocs.scoreDocs) { final Document doc = indexSearcher.doc(match.doc); final String sentence = doc.get(FIELD_NAME); final List<RuleMatch> ruleMatches = languageTool.check(sentence); if (ruleMatches.size() > 0) { final String source = doc.get(SOURCE_FIELD_NAME); final AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence); final MatchingSentence matchingSentence = new MatchingSentence(sentence, source, analyzedSentence, ruleMatches); matchingSentences.add(matchingSentence); } } return matchingSentences; }
private void assertGood(String s) throws IOException { assertEquals( "Found unexpected match in sentence '" + s + "'", 0, rule.match(langTool.getAnalyzedSentence(s)).length); }
private void assertBad(String input) throws IOException { assertEquals( "Did not find expected error in: '" + input + "'", 1, rule.match(langTool.getAnalyzedSentence(input)).length); }
private void assertCorrect(String sentence) throws IOException { final RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(sentence)); assertEquals(0, matches.length); }
private void assertBad(String s, int n) throws IOException { assertEquals(n, rule.match(langTool.getAnalyzedSentence(s)).length); }
public void testRule() throws IOException { // correct sentences: assertCorrect("(l'execució a mans d'\"especialistes\")"); assertCorrect("\"Vine\", li va dir."); assertCorrect("(Una frase de prova)."); assertCorrect("Aquesta és la paraula 'prova'."); assertCorrect("This is a sentence with a smiley :-)"); assertCorrect("This is a sentence with a smiley ;-) and so on..."); assertCorrect("Aquesta és l'hora de les decisions."); assertCorrect("Aquesta és l’hora de les decisions."); assertCorrect("(fig. 20)"); assertCorrect("\"Sóc la teva filla. El corcó no et rosegarà més.\"\n\n"); assertCorrect("–\"Club dels llagoters\" –va repetir en Ron."); assertCorrect("—\"Club dels llagoters\" –va repetir en Ron."); assertCorrect("»Això em porta a demanar-t'ho."); assertCorrect("»Això em porta (sí) a demanar-t'ho."); assertCorrect("al capítol 12 \"Llavors i fruits oleaginosos\""); assertCorrect("\"Per què serveixen les forquilles?\" i aquest respon \"per menjar\"."); assertCorrect("És a 60º 50' 23\""); assertCorrect("És a 60º 50' 23'"); assertCorrect("60° 50' 23'"); assertCorrect("60° 50'"); // assertCorrect("el grau en 60 parts iguals, tenim el minut (1'):"); // assertCorrect("el minut en 60 parts iguals, tenim el segon (1\"):"); // assertCorrect("The screen is 20\" wide."); assertCorrect("This is a [test] sentence..."); assertCorrect( "The plight of Tamil refugees caused a surge of support from most of the Tamil political parties.[90]"); assertCorrect("This is what he said: \"We believe in freedom. This is what we do.\""); assertCorrect("(([20] [20] [20]))"); // test for a case that created a false alarm after disambiguation assertCorrect("This is a \"special test\", right?"); // numerical bullets assertCorrect("We discussed this in Chapter 1)."); assertCorrect("The jury recommended that: (1) Four additional deputies be employed."); assertCorrect("We discussed this in section 1a)."); assertCorrect("We discussed this in section iv)."); // inches exception shouldn't match " here: assertCorrect( "In addition, the government would pay a $1,000 \"cost of education\" grant to the schools."); // assertCorrect("Paradise lost to the alleged water needs of Texas' big cities Thursday."); assertCorrect("Porta'l cap ací."); assertCorrect("Porta-me'n cinquanta!"); // incorrect sentences: assertIncorrect("Ploraria.\""); assertIncorrect("Aquesta és l555’hora de les decisions."); assertIncorrect("Vine\", li va dir."); assertIncorrect("Aquesta és l‘hora de les decisions."); assertIncorrect("(This is a test sentence."); assertIncorrect("This is a test with an apostrophe &'."); assertIncorrect("&'"); assertIncorrect("!'"); assertIncorrect("What?'"); // this is currently considered incorrect... although people often use smileys this way: assertIncorrect("Some text (and some funny remark :-) with more text to follow"); RuleMatch[] matches; matches = rule.match(langTool.getAnalyzedSentence("(This is a test” sentence.")); assertEquals(2, matches.length); matches = rule.match(langTool.getAnalyzedSentence("This [is (a test} sentence.")); assertEquals(3, matches.length); }