コード例 #1
0
ファイル: HighFreqTerms.java プロジェクト: hutea/luke
  /**
   * @param reader
   * @param numTerms
   * @param field
   * @return TermStats[] ordered by terms with highest docFreq first.
   * @throws Exception
   */
  public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, String[] fieldNames)
      throws Exception {
    TermStatsQueue tiq = null;
    TermsEnum te = null;

    if (fieldNames != null) {
      Fields fields = MultiFields.getFields(reader);
      if (fields == null) {
        LOG.info("Index with no fields - probably empty or corrupted");
        return EMPTY_STATS;
      }
      tiq = new TermStatsQueue(numTerms);
      for (String field : fieldNames) {
        Terms terms = fields.terms(field);
        if (terms != null) {
          te = terms.iterator(te);
          fillQueue(te, tiq, field);
        }
      }
    } else {
      Fields fields = MultiFields.getFields(reader);
      if (fields == null) {
        LOG.info("Index with no fields - probably empty or corrupted");
        return EMPTY_STATS;
      }
      tiq = new TermStatsQueue(numTerms);
      Iterator<String> fieldIterator = fields.iterator();
      while (fieldIterator.hasNext()) {
        String field = fieldIterator.next();
        Terms terms = fields.terms(field);
        if (terms != null) {
          te = terms.iterator(te);
          fillQueue(te, tiq, field);
        }
      }
    }

    TermStats[] result = new TermStats[tiq.size()];
    // we want highest first so we read the queue and populate the array
    // starting at the end and work backwards
    int count = tiq.size() - 1;
    while (tiq.size() != 0) {
      result[count] = tiq.pop();
      count--;
    }
    return result;
  }
コード例 #2
0
ファイル: HighFreqTerms.java プロジェクト: hutea/luke
 public static void fillQueue(TermsEnum termsEnum, TermStatsQueue tiq, String field)
     throws Exception {
   BytesRef term;
   while ((term = termsEnum.next()) != null) {
     BytesRef r = new BytesRef();
     r.copyBytes(term);
     tiq.insertWithOverflow(new TermStats(field, r, termsEnum.docFreq()));
   }
 }