Beispiel #1
0
  /**
   * Compute P(O|Theta), the probability of the observation sequence given the model, by forward
   * recursion with scaling.
   *
   * @param O an observation sequence
   * @return P(O|Theta)
   */
  public double evaluate(int[] O) {

    // Forward Recursion with Scaling

    int T = O.length;
    double[] c = allocateVector(T);
    double[] alpha_hat_t = allocateVector(N);
    double[] alpha_hat_t_plus_1 = allocateVector(N);
    double[] temp_alpha = null;
    double log_likelihood = 0;

    for (int t = 0; t < T; t++) {
      if (t == 0) {
        for (int i = 0; i < N; i++) {
          alpha_hat_t[i] = pi[i] * B[i][O[0]];
        }
      } else {
        clearVector(alpha_hat_t_plus_1);
        for (int j = 0; j < N; j++) {
          for (int i = 0; i < N; i++) {
            alpha_hat_t_plus_1[j] += alpha_hat_t[i] * A[i][j] * B[j][O[t]];
          }
        }
        temp_alpha = alpha_hat_t;
        alpha_hat_t = alpha_hat_t_plus_1;
        alpha_hat_t_plus_1 = temp_alpha;
      }
      c[t] = 1.0 / sum(alpha_hat_t);
      timesAssign(alpha_hat_t, c[t]);
      log_likelihood -= Math.log(c[t]);
    }

    return Math.exp(log_likelihood);
  }
Beispiel #2
0
  /**
   * Inference the basic HMM with scaling. Memory complexity is O(TN) + O(N^2) + O(NM), and
   * computation complexity is O(tDTN^2), where t is the number of outer iterations.
   */
  public void train() {

    int D = Os.length;
    int T_n = 0;
    double log_likelihood = 0;
    double log_likelihood_new = 0;
    double epsilon = this.epsilon;
    int maxIter = this.maxIter;

    // Initialization

    clearVector(pi);
    clearMatrix(A);
    clearMatrix(B);

    double[] a = allocateVector(N);
    double[] b = allocateVector(N);

    int[] Q_n = null;
    int[] O_n = null;

    if (Qs == null) {

      pi = initializePi();
      A = initializeA();
      B = initializeB();

    } else {

      for (int n = 0; n < D; n++) {
        Q_n = Qs[n];
        O_n = Os[n];
        T_n = Os[n].length;
        for (int t = 0; t < T_n; t++) {
          if (t < T_n - 1) {
            A[Q_n[t]][Q_n[t + 1]] += 1;
            a[Q_n[t]] += 1;
            if (t == 0) {
              pi[Q_n[0]] += 1;
            }
          }
          B[Q_n[t]][O_n[t]] += 1;
          b[Q_n[t]] += 1;
        }
      }
      divideAssign(pi, D);
      for (int i = 0; i < N; i++) {
        divideAssign(A[i], a[i]);
        divideAssign(B[i], b[i]);
      }
    }

    int s = 0;
    double[] pi_new = allocateVector(N);
    double[][] A_new = allocateMatrix(N, N);
    double[][] B_new = allocateMatrix(N, M);
    double[] temp_pi = null;
    double[][] temp_A = null;
    double[][] temp_B = null;
    double[][] alpha_hat = null;
    double[][] beta_hat = null;
    double[] c_n = null;
    double[][] xi = allocateMatrix(N, N);
    double[] gamma = allocateVector(N);
    do {

      // Clearance
      clearVector(pi_new);
      clearMatrix(A_new);
      clearMatrix(B_new);
      clearVector(a);
      clearVector(b);
      /*clearMatrix(xi);
      clearVector(gamma);*/
      log_likelihood_new = 0;

      for (int n = 0; n < D; n++) {

        // Q_n = Qs[n];
        O_n = Os[n];
        T_n = Os[n].length;
        c_n = allocateVector(T_n);
        alpha_hat = allocateMatrix(T_n, N);
        beta_hat = allocateMatrix(T_n, N);

        // Forward Recursion with Scaling

        for (int t = 0; t <= T_n - 1; t++) {
          if (t == 0) {
            for (int i = 0; i < N; i++) {
              alpha_hat[0][i] = pi[i] * B[i][O_n[0]];
            }
          } else {
            for (int j = 0; j < N; j++) {
              for (int i = 0; i < N; i++) {
                alpha_hat[t][j] += alpha_hat[t - 1][i] * A[i][j] * B[j][O_n[t]];
              }
            }
          }
          c_n[t] = 1.0 / sum(alpha_hat[t]);
          timesAssign(alpha_hat[t], c_n[t]);
        }

        // Backward Recursion with Scaling

        for (int t = T_n + 1; t >= 2; t--) {
          if (t == T_n + 1) {
            for (int i = 0; i < N; i++) {
              beta_hat[t - 2][i] = 1;
            }
          }
          if (t <= T_n) {
            for (int i = 0; i < N; i++) {
              for (int j = 0; j < N; j++) {
                beta_hat[t - 2][i] += A[i][j] * B[j][O_n[t - 1]] * beta_hat[t - 1][j];
              }
            }
          }
          timesAssign(beta_hat[t - 2], c_n[t - 2]);
        }

        // Expectation Variables and Updating Model Parameters

        for (int t = 0; t <= T_n - 1; t++) {
          if (t < T_n - 1) {
            for (int i = 0; i < N; i++) {
              for (int j = 0; j < N; j++) {
                xi[i][j] = alpha_hat[t][i] * A[i][j] * B[j][O_n[t + 1]] * beta_hat[t + 1][j];
                // A_new[i][j] += xi[i][j];
              }
              plusAssign(A_new[i], xi[i]);
              gamma[i] = sum(xi[i]);
            }
            if (t == 0) {
              plusAssign(pi_new, gamma);
            }
            plusAssign(a, gamma);
          } else {
            assignVector(gamma, alpha_hat[t]);
          }
          for (int j = 0; j < N; j++) {
            B_new[j][O_n[t]] += gamma[j];
          }
          plusAssign(b, gamma);
          log_likelihood_new += -Math.log(c_n[t]);
        }
      }

      // Normalization (Sum to One)

      sum2one(pi_new);

      for (int i = 0; i < N; i++) {
        divideAssign(A_new[i], a[i]);
      }

      for (int j = 0; j < N; j++) {
        divideAssign(B_new[j], b[j]);
      }

      temp_pi = pi;
      pi = pi_new;
      pi_new = temp_pi;

      temp_A = A;
      A = A_new;
      A_new = temp_A;

      temp_B = B;
      B = B_new;
      B_new = temp_B;
      // display(B);

      s = s + 1;

      if (s > 1) {
        if (Math.abs((log_likelihood_new - log_likelihood) / log_likelihood) < epsilon) {
          fprintf("log[P(O|Theta)] does not increase.\n\n");
          break;
        }
      }

      log_likelihood = log_likelihood_new;
      fprintf("Iter: %d, log[P(O|Theta)]: %f\n", s, log_likelihood);

    } while (s < maxIter);
  }