/** Test of detectLanguage method, of class LanguageDetector with vi pages. */
 public void testDetectLanguageVi() {
   LOGGER.debug("detectLanguage vi");
   LanguageDetector instance = LanguageDetector.getInstance();
   Document doc;
   try {
     doc = Jsoup.parse(new File(PATH + "vi.wikipedia.org-wiki_20140701.html"), UTF_8);
     LOGGER.debug("start detection");
     assertEquals("vi", instance.detectLanguage(doc.text()).getDetectedLanguage());
     assertEquals("vi", instance.detectLanguage(doc.text().toLowerCase()).getDetectedLanguage());
     assertEquals("vi", instance.detectLanguage(doc.text().toUpperCase()).getDetectedLanguage());
     LOGGER.debug("detection ended");
   } catch (IOException ex) {
     LOGGER.error(ex);
   } catch (NullPointerException npe) {
     LOGGER.error("error while fetching page " + npe);
   }
 }
 /** Test of detectLanguage method, of class LanguageDetector with ru pages. */
 public void testDetectLanguageRu() {
   LOGGER.debug("detectLanguage ru");
   LanguageDetector instance = LanguageDetector.getInstance();
   Document doc;
   try {
     doc = Jsoup.parse(new File(PATH + "timeliner.ru_20140701.html"), UTF_8);
     LOGGER.debug("start detection");
     assertEquals("ru", instance.detectLanguage(doc.text()).getDetectedLanguage());
     LOGGER.debug("detection ended");
     doc = Jsoup.parse(new File(PATH + "atrainings.ru_20140701.html"), UTF_8);
     LOGGER.debug("start detection");
     assertEquals("ru", instance.detectLanguage(doc.text()).getDetectedLanguage());
     LOGGER.debug("detection ended");
     doc = Jsoup.parse(new File(PATH + "alpidos.ru-home_20140701.html"), UTF_8);
     LOGGER.debug("start detection");
     assertEquals("ru", instance.detectLanguage(doc.text()).getDetectedLanguage());
     LOGGER.debug("detection ended");
   } catch (IOException ex) {
     LOGGER.error(ex);
   } catch (NullPointerException npe) {
     LOGGER.error("error while fetching page " + npe);
   }
 }
  private static String filterLine(String string) {

    if (numericalDensity(string) > Config.getNumericalDensity()) {
      return null;
    }

    if (LanguageDetector.isEnabled()) {
      String language = Config.getLanguage();

      if (language == null
          || language.isEmpty()
          || !LanguageDetector.isLanguage(string, language, Config.getLanguageCertainty())) {
        return null;
      }
    }

    if (Config.stripUnicode()) {
      string = string.replaceAll("[^\\x00-\\x7F]", "");
      // Normalizer.normalize(string, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
      return string;
    }

    return string;
  }
  public void testDetectLanguageWithCaseSensitiveText() {
    LOGGER.debug("detectLanguage With case sensitive text");
    LanguageDetector instance = LanguageDetector.getInstance();
    LOGGER.debug("start detection");
    assertEquals(
        "fr",
        instance
            .detectLanguage(
                "34808 : CABLE DE COMMANDE DE BOITE A VITESSE RENAULT KANGOO 32.11 € TTC 34808 : CABLE DE COMMANDE DE BOITE A VITESSE RENAULT KANGOO")
            .getDetectedLanguage());
    assertEquals(
        "fr",
        instance
            .detectLanguage(
                "CABLE DE COMMANDE DE BOITE A VITESSE RENAULT KANGOO TTC CABLE DE COMMANDE DE BOITE A VITESSE RENAULT KANGOO")
            .getDetectedLanguage());
    assertEquals(
        "fr",
        instance
            .detectLanguage("CABLE DE COMMANDE DE BOITE A VITESSE RENAULT KANGOO TTC")
            .getDetectedLanguage());

    assertEquals(
        "fr",
        instance
            .detectLanguage("CABLE DE COMMANDE DE BOITE A VITESSE RENAULT TTC")
            .getDetectedLanguage());

    // suspiscion de AULT (suppression de renault)
    assertEquals(
        "fr",
        instance.detectLanguage("CABLE DE COMMANDE DE BOITE A VITESSE TTC").getDetectedLanguage());

    // suspiscion de ESPACE (suppression de espace)
    assertEquals(
        "fr",
        instance
            .detectLanguage("MAITRE CYLINDRE DE FREIN RENAULT 21 ESPACE 50.06 € TTC")
            .getDetectedLanguage());
    assertEquals(
        "fr",
        instance
            .detectLanguage("MAITRE CYLINDRE DE FREIN RENAULT ESPACE  TTC")
            .getDetectedLanguage());
    assertEquals(
        "fr",
        instance.detectLanguage("MAITRE CYLINDRE DE FREIN RENAULT TTC").getDetectedLanguage());

    // suspiscion de ESPACE (suppression de espace ET renault)
    assertEquals(
        "fr", instance.detectLanguage("MAITRE CYLINDRE DE FREIN TTC").getDetectedLanguage());

    assertEquals(
        "es",
        instance
            .detectLanguage(
                "NO PODEÍS PREPARAR A VUESTROS ALUMNOS PARA QUE CONSTRUYAN MAÑANA EL MUNDO DE SUS SUEÑOS SI VOSOTROS YA NO CREÉIS EN ESOS SUEÑOS NO PODEÍS PREPARARLOS PARA LA VIDA SINO CREÉIS EN ELLA NO PODRÉIS MOSTRAR EL CAMINO SI OS HABEÍS SENTADO CANSADOS Y DESALENTADOS EN LA ENCRUCIJADA CELESTIN FREINET FRANCIA")
            .getDetectedLanguage());
    LOGGER.debug("detection ended");
  }