/* * Guess the language of r and returns and array of Language Probability * sorted in decreasing order of probability * * If r is empty, returns all languages with probability of 0.0 * * Maximum maxGrams of r are processed */ public LanguageProbability[] guessLanguages(Reader r, int maxGrams) throws IOException { if (r == null) throw new IllegalArgumentException("Reader r must not be null"); if (maxGrams < 1) throw new IllegalArgumentException("maxGrams must be greater or equal to 1"); SortedSet ss = new TreeSet(); tg.setReader(r); test.clear(); tg.start(maxGrams); Set refset = trigramsmap.keySet(); Iterator it = refset.iterator(); while (it.hasNext()) { Trigrams reference = (Trigrams) it.next(); long distance = test.distance(reference); ss.add(new LanguageProbability((String) trigramsmap.get(reference), distance)); } // Transfer to array LanguageProbability[] lp = new LanguageProbability[ss.size()]; it = ss.iterator(); int i = 0; while (it.hasNext()) { lp[i++] = (LanguageProbability) it.next(); } float minprob = lp[0].probability; // In case of an empty reader, all languages are returned // with a probability of 0.0 if (minprob > 0.0f) { for (i = 0; i < lp.length; i++) { lp[i].probability = minprob / lp[i].probability; } } return lp; }
/* * Guess the language of r and returns its ISO-639 Code * Maximum maxGrams of r are processed */ public String guessLanguage(Reader r, int maxGrams) throws IOException { if (r == null) throw new IllegalArgumentException("Reader r must not be null"); if (maxGrams < 1) throw new IllegalArgumentException("maxGrams must be greater or equal to 1"); Trigrams bestreference = null; tg.setReader(r); test.clear(); tg.start(maxGrams); Set refset = trigramsmap.keySet(); Iterator it = refset.iterator(); long min = Long.MAX_VALUE; while (it.hasNext()) { Trigrams reference = (Trigrams) it.next(); long distance = test.distance(reference); if (distance < min) { bestreference = reference; min = distance; } } return (String) trigramsmap.get(bestreference); }