Beispiel #1
0
  public boolean addWord(String str, long fileid, long pos) {
    /*
     * Filter out certain strings. e.g. words that are either very rare (e.g.
     * spelling mistakes) or extremely common "stop words".
     * http://en.wikipedia.org/wiki/Stop_words
     *
     * Our definition of stop words is completely arbitrary. It has more to do
     * with what kind of computation we can do on a single appengine node.
     *
     * Ideally we would not ignore any words, but that will require sharding the
     * word index across a number of serving machines.
     */
    if (str.length() < 3) return false;
    int freq = dict.getFrequency(str);
    if (freq == 0 || freq > 90000) return false;

    ArrayList<FilePositions> fplist = idx.get(str);
    if (fplist == null) {
      fplist = new ArrayList<FilePositions>();
      idx.put(str, fplist);
    }

    if (fplist.size() == 0 || fplist.get(fplist.size() - 1).fileid != fileid) {
      FilePositions fp = new FilePositions();
      fp.fileid = fileid;
      fp.poslist = new ArrayList<Long>();
      fplist.add(fp);
    }
    fplist.get(fplist.size() - 1).poslist.add(pos);
    return true;
  }
 public static int getMaxFrequency(
     final ConcurrentHashMap<String, Dictionary> dictionaries, CharSequence word) {
   if (TextUtils.isEmpty(word)) {
     return Dictionary.NOT_A_PROBABILITY;
   }
   int maxFreq = -1;
   for (final String key : dictionaries.keySet()) {
     final Dictionary dictionary = dictionaries.get(key);
     if (null == dictionary) continue;
     final int tempFreq = dictionary.getFrequency(word);
     if (tempFreq >= maxFreq) {
       maxFreq = tempFreq;
     }
   }
   return maxFreq;
 }