Esempio n. 1
0
  public void testCompareLists() throws IOException {
    AnalyzedSentence sentence1 = langTool.getAnalyzedSentence("Hier ein Test");
    assertTrue(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(), 0, 2, new String[] {"", "Hier", "ein"}));
    assertTrue(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(), 1, 2, new String[] {"Hier", "ein"}));
    assertTrue(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(),
            0,
            3,
            new String[] {"", "Hier", "ein", "Test"}));
    assertFalse(
        rule.compareLists(
            sentence1.getTokensWithoutWhitespace(),
            0,
            4,
            new String[] {"", "Hier", "ein", "Test"}));

    AnalyzedSentence sentence2 = langTool.getAnalyzedSentence("das Heilige Römische Reich");
    assertTrue(
        rule.compareLists(
            sentence2.getTokensWithoutWhitespace(),
            0,
            4,
            new String[] {"", "das", "Heilige", "Römische", "Reich"}));
    assertFalse(
        rule.compareLists(
            sentence2.getTokensWithoutWhitespace(),
            8,
            11,
            new String[] {"", "das", "Heilige", "Römische", "Reich"}));
  }
 private void assertBadWithMessage(String s, String expectedErrorSubstring) throws IOException {
   assertEquals(1, rule.match(langTool.getAnalyzedSentence(s)).length);
   final String errorMessage = rule.match(langTool.getAnalyzedSentence(s))[0].getMessage();
   assertTrue(
       "Got error '" + errorMessage + "', expected substring '" + expectedErrorSubstring + "'",
       errorMessage.contains(expectedErrorSubstring));
 }
  public void testRule() throws IOException {

    // correct sentences:
    // assertCorrect("els etiquetadors sobre els etiquetats.");
    assertCorrect("tot tenyit amb llum de nostàlgia");
    assertCorrect("Ho van fer per duplicat.");
    assertCorrect("Assecat el braç del riu");
    assertCorrect("el llibre empaquetat");
    assertCorrect("un resultat equilibrat");
    assertCorrect("el nostre equip era bastant equilibrat");
    assertCorrect("un llibre ben empaquetat");
    assertCorrect("l'informe filtrat pel ministre");
    assertCorrect("L'informe filtrat és terrible");
    assertCorrect("ha liderat la batalla");
    assertCorrect("Els tinc empaquetats");
    assertCorrect("amb tractament unitari i equilibrat");
    assertCorrect("Processat després de la mort de Carles II");
    assertCorrect("Processat diverses vegades");
    assertCorrect("moltes vegades empaquetat amb pressa");
    assertCorrect("és llavors embotellat i llançat al mercat");
    assertCorrect("la comercialització de vi embotellat amb les firmes comercials");
    assertCorrect("eixia al mercat el vi blanc embotellat amb la marca");
    assertCorrect("que arribi a un equilibrat matrimoni");
    assertCorrect("És un cafè amb molt de cos i molt equilibrat.");
    assertCorrect("i per tant etiquetat com a observat");
    assertCorrect("Molt equilibrat en les seves característiques");
    assertCorrect("filtrat per Wikileaks");
    assertCorrect("una vegada filtrat");
    assertCorrect("no equilibrat");

    // errors:
    assertIncorrect("Assecat del braç del riu");
    assertIncorrect("Cal vigilar el filtrat del vi");
    assertIncorrect("El procés d'empaquetat");
    assertIncorrect("Els equilibrats de les rodes");
    // assertIncorrect("Duplicat de claus");
    assertIncorrect("El procés d'etiquetat de les ampolles");
    assertIncorrect("El rentat de cotes");

    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("El repicat i el rejuntat."));
    assertEquals(2, matches.length);

    matches = rule.match(langTool.getAnalyzedSentence("El procés de relligat dels llibres."));
    assertEquals(1, matches.length);
    assertEquals("relligadura", matches[0].getSuggestedReplacements().get(0));
    assertEquals("relligament", matches[0].getSuggestedReplacements().get(1));
    assertEquals("relligada", matches[0].getSuggestedReplacements().get(2));

    matches = rule.match(langTool.getAnalyzedSentence("Els rentats de cervell."));
    assertEquals(1, matches.length);
    assertEquals("rentades", matches[0].getSuggestedReplacements().get(0));
    assertEquals("rentatges", matches[0].getSuggestedReplacements().get(1));
    assertEquals("rentaments", matches[0].getSuggestedReplacements().get(2));
  }
  @Test
  public void testRule() throws IOException {
    final MixedAlphabetsRule rule = new MixedAlphabetsRule(TestTools.getMessages("uk"));
    final JLanguageTool langTool = new JLanguageTool(new Ukrainian());

    // correct sentences:
    assertEquals(0, rule.match(langTool.getAnalyzedSentence("сміття")).length);

    // incorrect sentences:

    RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence("смi\u00ADття"));
    // check match positions:
    assertEquals(1, matches.length);
    assertEquals(Arrays.asList("сміття"), matches[0].getSuggestedReplacements());
  }
 private static void tagText(final String contents, final JLanguageTool lt) throws IOException {
   AnalyzedSentence analyzedText;
   final List<String> sentences = lt.sentenceTokenize(contents);
   for (final String sentence : sentences) {
     analyzedText = lt.getAnalyzedSentence(sentence);
     System.out.println(getSentence(analyzedText));
   }
 }
  public void testChunker() throws Exception {
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("Для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();

    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
  }
 private void assertBad(String s, String... expectedSuggestions) throws IOException {
   RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s));
   assertEquals("Did not find one match in sentence '" + s + "'", 1, matches.length);
   if (expectedSuggestions.length > 0) {
     RuleMatch match = matches[0];
     List<String> suggestions = match.getSuggestedReplacements();
     assertThat(suggestions, is(Arrays.asList(expectedSuggestions)));
   }
 }
  public void testPositions() throws IOException {
    final AccentuationCheckRule rule = new AccentuationCheckRule(TestTools.getEnglishMessages());
    final RuleMatch[] matches;
    final JLanguageTool langTool = new JLanguageTool(new Catalan());

    matches = rule.match(langTool.getAnalyzedSentence("Són circumstancies extraordinàries."));
    assertEquals(4, matches[0].getFromPos());
    assertEquals(18, matches[0].getToPos());
  }
  @Test
  public void testRule() throws IOException {
    DifferentPunctuationRule rule = new DifferentPunctuationRule();
    RuleMatch[] matches;
    JLanguageTool srcLangTool = new JLanguageTool(TestTools.getDemoLanguage());
    JLanguageTool trgLangTool = new JLanguageTool(new FakeLanguage());
    rule.setSourceLanguage(TestTools.getDemoLanguage());
    // correct sentences:
    matches =
        rule.match(
            srcLangTool.getAnalyzedSentence("This is a test sentence!"),
            trgLangTool.getAnalyzedSentence("C'est la vie!"));
    assertEquals(0, matches.length);

    matches =
        rule.match(
            srcLangTool.getAnalyzedSentence("one sentence"),
            trgLangTool.getAnalyzedSentence("jedno zdanie"));
    assertEquals(0, matches.length);

    // incorrect sentences:
    matches =
        rule.match(
            srcLangTool.getAnalyzedSentence("This this is a test sentence."),
            trgLangTool.getAnalyzedSentence("This this is a test sentence!"));
    assertEquals(1, matches.length);
  }
 private void assertBad(String s, int n, String... expectedSuggestions) throws IOException {
   RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s));
   assertEquals("Did not find " + n + " match(es) in sentence '" + s + "'", n, matches.length);
   if (expectedSuggestions.length > 0) {
     RuleMatch match = matches[0];
     // When two errors are reported by the rule (so TODO above), it might happen that the first
     // match does not have the suggestions, but the second one
     if (matches.length > 1 && match.getSuggestedReplacements().size() == 0) {
       match = matches[1];
     }
     List<String> suggestions = match.getSuggestedReplacements();
     assertThat(suggestions, is(Arrays.asList(expectedSuggestions)));
   }
 }
Esempio n. 11
0
 private List<MatchingSentence> findMatchingSentences(
     IndexSearcher indexSearcher, TopDocs topDocs, JLanguageTool languageTool) throws IOException {
   final List<MatchingSentence> matchingSentences = new ArrayList<>();
   for (ScoreDoc match : topDocs.scoreDocs) {
     final Document doc = indexSearcher.doc(match.doc);
     final String sentence = doc.get(FIELD_NAME);
     final List<RuleMatch> ruleMatches = languageTool.check(sentence);
     if (ruleMatches.size() > 0) {
       final String source = doc.get(SOURCE_FIELD_NAME);
       final AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
       final MatchingSentence matchingSentence =
           new MatchingSentence(sentence, source, analyzedSentence, ruleMatches);
       matchingSentences.add(matchingSentence);
     }
   }
   return matchingSentences;
 }
Esempio n. 12
0
 private void assertGood(String s) throws IOException {
   assertEquals(
       "Found unexpected match in sentence '" + s + "'",
       0,
       rule.match(langTool.getAnalyzedSentence(s)).length);
 }
Esempio n. 13
0
 private void assertBad(String input) throws IOException {
   assertEquals(
       "Did not find expected error in: '" + input + "'",
       1,
       rule.match(langTool.getAnalyzedSentence(input)).length);
 }
 private void assertCorrect(String sentence) throws IOException {
   final RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(sentence));
   assertEquals(0, matches.length);
 }
 private void assertBad(String s, int n) throws IOException {
   assertEquals(n, rule.match(langTool.getAnalyzedSentence(s)).length);
 }
  public void testRule() throws IOException {
    // correct sentences:
    assertCorrect("(l'execució a mans d'\"especialistes\")");
    assertCorrect("\"Vine\", li va dir.");
    assertCorrect("(Una frase de prova).");
    assertCorrect("Aquesta és la paraula 'prova'.");
    assertCorrect("This is a sentence with a smiley :-)");
    assertCorrect("This is a sentence with a smiley ;-) and so on...");
    assertCorrect("Aquesta és l'hora de les decisions.");
    assertCorrect("Aquesta és l’hora de les decisions.");
    assertCorrect("(fig. 20)");
    assertCorrect("\"Sóc la teva filla. El corcó no et rosegarà més.\"\n\n");
    assertCorrect("–\"Club dels llagoters\" –va repetir en Ron.");
    assertCorrect("—\"Club dels llagoters\" –va repetir en Ron.");
    assertCorrect("»Això em porta a demanar-t'ho.");
    assertCorrect("»Això em porta (sí) a demanar-t'ho.");
    assertCorrect("al capítol 12 \"Llavors i fruits oleaginosos\"");
    assertCorrect("\"Per què serveixen les forquilles?\" i aquest respon \"per menjar\".");
    assertCorrect("És a 60º 50' 23\"");
    assertCorrect("És a 60º 50' 23'");
    assertCorrect("60° 50' 23'");
    assertCorrect("60° 50'");
    // assertCorrect("el grau en 60 parts iguals, tenim el minut (1'):");
    // assertCorrect("el minut en 60 parts iguals, tenim el segon (1\"):");

    // assertCorrect("The screen is 20\" wide.");
    assertCorrect("This is a [test] sentence...");
    assertCorrect(
        "The plight of Tamil refugees caused a surge of support from most of the Tamil political parties.[90]");
    assertCorrect("This is what he said: \"We believe in freedom. This is what we do.\"");
    assertCorrect("(([20] [20] [20]))");
    // test for a case that created a false alarm after disambiguation
    assertCorrect("This is a \"special test\", right?");
    // numerical bullets
    assertCorrect("We discussed this in Chapter 1).");
    assertCorrect("The jury recommended that: (1) Four additional deputies be employed.");
    assertCorrect("We discussed this in section 1a).");
    assertCorrect("We discussed this in section iv).");
    // inches exception shouldn't match " here:
    assertCorrect(
        "In addition, the government would pay a $1,000 \"cost of education\" grant to the schools.");
    // assertCorrect("Paradise lost to the alleged water needs of Texas' big cities Thursday.");
    assertCorrect("Porta'l cap ací.");
    assertCorrect("Porta-me'n cinquanta!");

    // incorrect sentences:
    assertIncorrect("Ploraria.\"");
    assertIncorrect("Aquesta és l555’hora de les decisions.");
    assertIncorrect("Vine\", li va dir.");
    assertIncorrect("Aquesta és l‘hora de les decisions.");
    assertIncorrect("(This is a test sentence.");
    assertIncorrect("This is a test with an apostrophe &'.");
    assertIncorrect("&'");
    assertIncorrect("!'");
    assertIncorrect("What?'");

    // this is currently considered incorrect... although people often use smileys this way:
    assertIncorrect("Some text (and some funny remark :-) with more text to follow");

    RuleMatch[] matches;
    matches = rule.match(langTool.getAnalyzedSentence("(This is a test” sentence."));
    assertEquals(2, matches.length);
    matches = rule.match(langTool.getAnalyzedSentence("This [is (a test} sentence."));
    assertEquals(3, matches.length);
  }