コード例 #1
0
  /*
   * Guess the language of r and returns and array of Language Probability
   * sorted in decreasing order of probability
   *
   * If r is empty, returns all languages with probability of 0.0
   *
   * Maximum maxGrams of r are processed
   */
  public LanguageProbability[] guessLanguages(Reader r, int maxGrams) throws IOException {

    if (r == null) throw new IllegalArgumentException("Reader r must not be null");
    if (maxGrams < 1) throw new IllegalArgumentException("maxGrams must be greater or equal to 1");

    SortedSet ss = new TreeSet();

    tg.setReader(r);
    test.clear();
    tg.start(maxGrams);

    Set refset = trigramsmap.keySet();
    Iterator it = refset.iterator();
    while (it.hasNext()) {
      Trigrams reference = (Trigrams) it.next();
      long distance = test.distance(reference);
      ss.add(new LanguageProbability((String) trigramsmap.get(reference), distance));
    }

    // Transfer to array
    LanguageProbability[] lp = new LanguageProbability[ss.size()];
    it = ss.iterator();
    int i = 0;
    while (it.hasNext()) {
      lp[i++] = (LanguageProbability) it.next();
    }

    float minprob = lp[0].probability;

    // In case of an empty reader, all languages are returned
    // with a probability of 0.0
    if (minprob > 0.0f) {
      for (i = 0; i < lp.length; i++) {
        lp[i].probability = minprob / lp[i].probability;
      }
    }

    return lp;
  }
コード例 #2
0
  /*
   * Guess the language of r and returns its ISO-639 Code
   * Maximum maxGrams of r are processed
   */
  public String guessLanguage(Reader r, int maxGrams) throws IOException {
    if (r == null) throw new IllegalArgumentException("Reader r must not be null");
    if (maxGrams < 1) throw new IllegalArgumentException("maxGrams must be greater or equal to 1");

    Trigrams bestreference = null;

    tg.setReader(r);
    test.clear();
    tg.start(maxGrams);

    Set refset = trigramsmap.keySet();
    Iterator it = refset.iterator();
    long min = Long.MAX_VALUE;
    while (it.hasNext()) {
      Trigrams reference = (Trigrams) it.next();
      long distance = test.distance(reference);
      if (distance < min) {
        bestreference = reference;
        min = distance;
      }
    }
    return (String) trigramsmap.get(bestreference);
  }