Esempio n. 1
0
  /**
   * GT smoothing with least squares interpolation. This follows the procedure in Jurafsky and
   * Martin sect. 4.5.3.
   */
  public void smoothAndNormalize() {
    Counter<Integer> cntCounter = new Counter<Integer>();
    for (K tok : lm.keySet()) {
      int cnt = (int) lm.getCount(tok);
      cntCounter.incrementCount(cnt);
    }

    final double[] coeffs = runLogSpaceRegression(cntCounter);

    UNK_PROB = cntCounter.getCount(1) / lm.totalCount();

    for (K tok : lm.keySet()) {
      double tokCnt = lm.getCount(tok);
      if (tokCnt <= unkCutoff) // Treat as unknown
      unkTokens.add(tok);
      if (tokCnt <= kCutoff) { // Smooth
        double cSmooth = katzEstimate(cntCounter, tokCnt, coeffs);
        lm.setCount(tok, cSmooth);
      }
    }

    // Normalize
    // Counters.normalize(lm);
    // MY COUNTER IS ALWAYS NORMALIZED AND AWESOME
  }
Esempio n. 2
0
 public static <E> Counter<E> normalize(Counter<E> counter) {
   Counter<E> normalizedCounter = new Counter<E>();
   double total = counter.totalCount();
   for (E key : counter.keySet()) {
     normalizedCounter.setCount(key, counter.getCount(key) / total);
   }
   return normalizedCounter;
 }
Esempio n. 3
0
 /**
  * @param <E>
  * @param x
  * @param y
  * @return
  */
 public static <E> double jensenShannonDivergence(Counter<E> x, Counter<E> y) {
   double sum = 0.0;
   double xTotal = x.totalCount();
   double yTotal = y.totalCount();
   for (E key : x.keySet()) {
     // x -> x+y/2
     double xVal = x.getCount(key) / xTotal;
     double yVal = y.getCount(key) / yTotal;
     double avg = 0.5 * (xVal + yVal);
     sum += xVal * Math.log(xVal / avg);
   }
   for (E key : y.keySet()) {
     // y -> x+y/2
     double xVal = x.getCount(key) / xTotal;
     double yVal = y.getCount(key) / yTotal;
     double avg = 0.5 * (xVal + yVal);
     sum += yVal * Math.log(yVal / avg);
   }
   return sum / 0.5;
 }
Esempio n. 4
0
 public static <E> E sample(Counter<E> counter) {
   double total = counter.totalCount();
   double rand = random.nextDouble();
   double sum = 0.0;
   if (total <= 0.0) {
     throw new RuntimeException("Non-positive counter total: " + total);
   }
   for (E key : counter.keySet()) {
     double count = counter.getCount(key);
     if (count < 0.0) {
       throw new RuntimeException("Negative count in counter: " + key + " => " + count);
     }
     double prob = count / total;
     sum += prob;
     if (rand < sum) {
       return key;
     }
   }
   throw new RuntimeException("Shouldn't Reach Here");
 }
Esempio n. 5
0
 public double totalMass() {
   return lm.totalCount();
 }