/** * Compute P(O|Theta), the probability of the observation sequence given the model, by forward * recursion with scaling. * * @param O an observation sequence * @return P(O|Theta) */ public double evaluate(int[] O) { // Forward Recursion with Scaling int T = O.length; double[] c = allocateVector(T); double[] alpha_hat_t = allocateVector(N); double[] alpha_hat_t_plus_1 = allocateVector(N); double[] temp_alpha = null; double log_likelihood = 0; for (int t = 0; t < T; t++) { if (t == 0) { for (int i = 0; i < N; i++) { alpha_hat_t[i] = pi[i] * B[i][O[0]]; } } else { clearVector(alpha_hat_t_plus_1); for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { alpha_hat_t_plus_1[j] += alpha_hat_t[i] * A[i][j] * B[j][O[t]]; } } temp_alpha = alpha_hat_t; alpha_hat_t = alpha_hat_t_plus_1; alpha_hat_t_plus_1 = temp_alpha; } c[t] = 1.0 / sum(alpha_hat_t); timesAssign(alpha_hat_t, c[t]); log_likelihood -= Math.log(c[t]); } return Math.exp(log_likelihood); }
/** * Inference the basic HMM with scaling. Memory complexity is O(TN) + O(N^2) + O(NM), and * computation complexity is O(tDTN^2), where t is the number of outer iterations. */ public void train() { int D = Os.length; int T_n = 0; double log_likelihood = 0; double log_likelihood_new = 0; double epsilon = this.epsilon; int maxIter = this.maxIter; // Initialization clearVector(pi); clearMatrix(A); clearMatrix(B); double[] a = allocateVector(N); double[] b = allocateVector(N); int[] Q_n = null; int[] O_n = null; if (Qs == null) { pi = initializePi(); A = initializeA(); B = initializeB(); } else { for (int n = 0; n < D; n++) { Q_n = Qs[n]; O_n = Os[n]; T_n = Os[n].length; for (int t = 0; t < T_n; t++) { if (t < T_n - 1) { A[Q_n[t]][Q_n[t + 1]] += 1; a[Q_n[t]] += 1; if (t == 0) { pi[Q_n[0]] += 1; } } B[Q_n[t]][O_n[t]] += 1; b[Q_n[t]] += 1; } } divideAssign(pi, D); for (int i = 0; i < N; i++) { divideAssign(A[i], a[i]); divideAssign(B[i], b[i]); } } int s = 0; double[] pi_new = allocateVector(N); double[][] A_new = allocateMatrix(N, N); double[][] B_new = allocateMatrix(N, M); double[] temp_pi = null; double[][] temp_A = null; double[][] temp_B = null; double[][] alpha_hat = null; double[][] beta_hat = null; double[] c_n = null; double[][] xi = allocateMatrix(N, N); double[] gamma = allocateVector(N); do { // Clearance clearVector(pi_new); clearMatrix(A_new); clearMatrix(B_new); clearVector(a); clearVector(b); /*clearMatrix(xi); clearVector(gamma);*/ log_likelihood_new = 0; for (int n = 0; n < D; n++) { // Q_n = Qs[n]; O_n = Os[n]; T_n = Os[n].length; c_n = allocateVector(T_n); alpha_hat = allocateMatrix(T_n, N); beta_hat = allocateMatrix(T_n, N); // Forward Recursion with Scaling for (int t = 0; t <= T_n - 1; t++) { if (t == 0) { for (int i = 0; i < N; i++) { alpha_hat[0][i] = pi[i] * B[i][O_n[0]]; } } else { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { alpha_hat[t][j] += alpha_hat[t - 1][i] * A[i][j] * B[j][O_n[t]]; } } } c_n[t] = 1.0 / sum(alpha_hat[t]); timesAssign(alpha_hat[t], c_n[t]); } // Backward Recursion with Scaling for (int t = T_n + 1; t >= 2; t--) { if (t == T_n + 1) { for (int i = 0; i < N; i++) { beta_hat[t - 2][i] = 1; } } if (t <= T_n) { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { beta_hat[t - 2][i] += A[i][j] * B[j][O_n[t - 1]] * beta_hat[t - 1][j]; } } } timesAssign(beta_hat[t - 2], c_n[t - 2]); } // Expectation Variables and Updating Model Parameters for (int t = 0; t <= T_n - 1; t++) { if (t < T_n - 1) { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { xi[i][j] = alpha_hat[t][i] * A[i][j] * B[j][O_n[t + 1]] * beta_hat[t + 1][j]; // A_new[i][j] += xi[i][j]; } plusAssign(A_new[i], xi[i]); gamma[i] = sum(xi[i]); } if (t == 0) { plusAssign(pi_new, gamma); } plusAssign(a, gamma); } else { assignVector(gamma, alpha_hat[t]); } for (int j = 0; j < N; j++) { B_new[j][O_n[t]] += gamma[j]; } plusAssign(b, gamma); log_likelihood_new += -Math.log(c_n[t]); } } // Normalization (Sum to One) sum2one(pi_new); for (int i = 0; i < N; i++) { divideAssign(A_new[i], a[i]); } for (int j = 0; j < N; j++) { divideAssign(B_new[j], b[j]); } temp_pi = pi; pi = pi_new; pi_new = temp_pi; temp_A = A; A = A_new; A_new = temp_A; temp_B = B; B = B_new; B_new = temp_B; // display(B); s = s + 1; if (s > 1) { if (Math.abs((log_likelihood_new - log_likelihood) / log_likelihood) < epsilon) { fprintf("log[P(O|Theta)] does not increase.\n\n"); break; } } log_likelihood = log_likelihood_new; fprintf("Iter: %d, log[P(O|Theta)]: %f\n", s, log_likelihood); } while (s < maxIter); }