private void loadDfs(Path dfStatsPath) throws IOException {
   if (dfs != null) return;
   FSDataInputStream dfStatsInput = fileSys.open(dfStatsPath);
   int l = dfStatsInput.readInt();
   if (l != prefixSet.size()) {
     throw new RuntimeException("df length mismatch: " + l + "\t" + prefixSet.size());
   }
   dfs = new int[l];
   for (int i = 0; i < l; i++) dfs[i] = WritableUtils.readVInt(dfStatsInput);
   dfStatsInput.close();
 }
  public void loadCfs(Path cfStatsPath) throws IOException {
    if (cfs != null) return;
    FSDataInputStream cfStatsInput = fileSys.open(cfStatsPath);

    int l = cfStatsInput.readInt();
    if (l != prefixSet.size()) {
      throw new RuntimeException("cf length mismatch: " + l + "\t" + prefixSet.size());
    }
    cfs = new long[l];
    for (int i = 0; i < l; i++) cfs[i] = WritableUtils.readVLong(cfStatsInput);
    cfStatsInput.close();
  }
  public PrefixEncodedGlobalStatsWithIndex(Path prefixSetPath, FileSystem fs) throws IOException {
    fileSys = fs;
    FSDataInputStream termsInput = fileSys.open(prefixSetPath);

    prefixSet.readFields(termsInput);
    termsInput.close();
  }
 public PairOfIntLong getStats(String term) {
   int df = -1;
   long cf = -1;
   PairOfIntLong p = new PairOfIntLong();
   if (frequentTermsDfs != null) {
     try {
       df = frequentTermsDfs.get(term);
       LOGGER.info("[cached] df of " + term + ": " + df);
       if (frequentTermsCfs != null) {
         try {
           cf = frequentTermsCfs.get(term);
           LOGGER.info("[cached] cf of " + term + ": " + cf);
           p.set(df, cf);
           return p;
         } catch (NoSuchElementException e) {
         }
       }
     } catch (NoSuchElementException e) {
     }
   }
   int index = prefixSet.getId(term);
   LOGGER.info("index of " + term + ": " + index);
   if (index < 0) return null;
   p.set(dfs[index], cfs[index]);
   return p;
 }
 private void loadFrequentCfMap(int n) {
   if (frequentTermsCfs != null) return;
   frequentTermsCfs = new HMapKL<String>();
   if (cfs.length < n) n = cfs.length;
   for (int id = 1; id <= n; id++) {
     frequentTermsCfs.put(prefixSet.getTerm(idToTerm[id - 1]), cfs[idToTerm[id - 1]]);
   }
 }
 private void loadFrequentDfMap(int n) {
   if (frequentTermsDfs != null) return;
   frequentTermsDfs = new HMapKI<String>();
   if (dfs.length < n) n = dfs.length;
   for (int id = 1; id <= n; id++) {
     frequentTermsDfs.put(prefixSet.getTerm(idToTerm[id - 1]), dfs[idToTerm[id - 1]]);
   }
   // return frequentTermsMap;
 }
 public void printKeys() {
   System.out.println("Window: " + this.prefixSet.getWindowSize());
   System.out.println("Length: " + this.length());
   // int window = prefixSet.getWindow();
   for (int i = 0; i < length() && i < 100; i++) {
     System.out.print(i + "\t" + prefixSet.getTerm(i));
     if (dfs != null) System.out.print("\t" + dfs[i]);
     if (cfs != null) System.out.print("\t" + cfs[i]);
     System.out.println();
   }
 }
  public long getCF(String term) {
    // if(cfs == null)
    //	throw new RuntimeException("CF-Stats must be loaded first!");

    if (frequentTermsDfs != null) {
      try {
        long cf = frequentTermsCfs.get(term);
        LOGGER.info("[cached] df of " + term + ": " + cf);
        return cf;
      } catch (NoSuchElementException e) {
      }
    }
    int index = prefixSet.getId(term);
    LOGGER.info("index of " + term + ": " + index);
    if (index < 0) return -1;
    return cfs[index];
  }
 public int length() {
   return prefixSet.size();
 }