protected double computeFunctionGradientLL(double lambda[], double grad[]) { double logli = 0; try { for (int f = 0; f < lambda.length; f++) { grad[f] = -1 * lambda[f] * params.invSigmaSquare; logli -= ((lambda[f] * lambda[f]) * params.invSigmaSquare) / 2; } diter.startScan(); if (featureGenCache != null) featureGenCache.startDataScan(); for (int numRecord = 0; diter.hasNext(); numRecord++) { DataSequence dataSeq = (DataSequence) diter.next(); if (featureGenCache != null) featureGenCache.nextDataIndex(); if (params.debugLvl > 1) { Util.printDbg("Read next seq: " + numRecord + " logli " + logli); } alpha_Y.assign(0); for (int f = 0; f < lambda.length; f++) ExpF[f] = RobustMath.LOG0; if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) { beta_Y = new DenseDoubleMatrix1D[2 * dataSeq.length()]; for (int i = 0; i < beta_Y.length; i++) beta_Y[i] = new DenseDoubleMatrix1D(numY); } // compute beta values in a backward scan. // also scale beta-values to 1 to avoid numerical problems. beta_Y[dataSeq.length() - 1].assign(0); for (int i = dataSeq.length() - 1; i > 0; i--) { if (params.debugLvl > 2) { /* Util.printDbg("Features fired"); featureGenerator.startScanFeaturesAt(dataSeq, i); while (featureGenerator.hasNext()) { Feature feature = featureGenerator.next(); Util.printDbg(feature.toString()); } */ } // compute the Mi matrix initMDone = computeLogMi( featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, false, reuseM, initMDone); tmp_Y.assign(beta_Y[i]); tmp_Y.assign(Ri_Y, sumFunc); RobustMath.logMult(Mi_YY, tmp_Y, beta_Y[i - 1], 1, 0, false, edgeGen); } double thisSeqLogli = 0; for (int i = 0; i < dataSeq.length(); i++) { // compute the Mi matrix initMDone = computeLogMi( featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, false, reuseM, initMDone); // find features that fire at this position.. featureGenerator.startScanFeaturesAt(dataSeq, i); if (i > 0) { tmp_Y.assign(alpha_Y); RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y, 1, 0, true, edgeGen); newAlpha_Y.assign(Ri_Y, sumFunc); } else { newAlpha_Y.assign(Ri_Y); } while (featureGenerator.hasNext()) { Feature feature = featureGenerator.next(); int f = feature.index(); int yp = feature.y(); int yprev = feature.yprev(); float val = feature.value(); if ((dataSeq.y(i) == yp) && (((i - 1 >= 0) && (yprev == dataSeq.y(i - 1))) || (yprev < 0))) { grad[f] += val; thisSeqLogli += val * lambda[f]; if (params.debugLvl > 2) { System.out.println("Feature fired " + f + " " + feature); } } if (yprev < 0) { ExpF[f] = RobustMath.logSumExp( ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp)); } else { ExpF[f] = RobustMath.logSumExp( ExpF[f], alpha_Y.get(yprev) + Ri_Y.get(yp) + Mi_YY.get(yprev, yp) + RobustMath.log(val) + beta_Y[i].get(yp)); } } alpha_Y.assign(newAlpha_Y); if (params.debugLvl > 2) { System.out.println("Alpha-i " + alpha_Y.toString()); System.out.println("Ri " + Ri_Y.toString()); System.out.println("Mi " + Mi_YY.toString()); System.out.println("Beta-i " + beta_Y[i].toString()); } } double lZx = RobustMath.logSumExp(alpha_Y); thisSeqLogli -= lZx; logli += thisSeqLogli; // update grad. for (int f = 0; f < grad.length; f++) { grad[f] -= RobustMath.exp(ExpF[f] - lZx); } if (params.debugLvl > 1) { System.out.println( "Sequence " + thisSeqLogli + " logli " + logli + " log(Zx) " + lZx + " Zx " + Math.exp(lZx)); } } if (params.debugLvl > 2) { for (int f = 0; f < lambda.length; f++) System.out.print(lambda[f] + " "); System.out.println(" :x"); for (int f = 0; f < lambda.length; f++) System.out.print(grad[f] + " "); System.out.println(" :g"); } if (params.debugLvl > 0) Util.printDbg( "Iteration " + icall + " log-likelihood " + logli + " norm(grad logli) " + norm(grad) + " norm(x) " + norm(lambda)); } catch (Exception e) { System.out.println("Alpha-i " + alpha_Y.toString()); System.out.println("Ri " + Ri_Y.toString()); System.out.println("Mi " + Mi_YY.toString()); e.printStackTrace(); System.exit(0); } return logli; }
/** Creates a new instance of testmatrix */ public GLSsolver(double[][] p_MatrixgleichNull) throws IllegalArgumentException { // -------------------------------------- // Kontrolle, ob Eingabematrix rechteckig // -------------------------------------- int nplus1 = p_MatrixgleichNull[0].length; for (int i = 1; i < p_MatrixgleichNull.length; i++) { // Zeilen i if (p_MatrixgleichNull[i].length != nplus1) { System.err.println( "Programmfehler: Matrix des GLS ist nicht rechteckig! (im solver entdeckt)"); throw new IllegalArgumentException(); } } if (nplus1 <= 1) throw new IllegalArgumentException("keine Unbekannte"); // keine Unbekannte!!! // Umgeht einen Fehler in der colt-Bibliothek // TODO wenn behoben, Workaround entfernen // ------ int anzGl = p_MatrixgleichNull.length; if (anzGl < nplus1 - 1) { // anzGleichungen < anz Unbekannte if (debug) System.out.println("WorkAround fuer Fehler in colt: 0 = 0 Gleichungen anhaengen"); anzGl = nplus1 - 1; // = Anzahl Unbek, 0 0 0 ... 0 = 0 Zeile angehängt } // ------------------------- // Daten in A und b einlesen // ------------------------- // so dass A*x = b A = new DenseDoubleMatrix2D(anzGl, (nplus1 - 1)); DenseDoubleMatrix2D b = new DenseDoubleMatrix2D(anzGl, 1); for (int i = 0; i < p_MatrixgleichNull.length; i++) { // Zeilen i for (int j = 0; j < nplus1 - 1; j++) { // Spalten A.set(i, j, p_MatrixgleichNull[i][j]); } b.set(i, 0, -p_MatrixgleichNull[i][nplus1 - 1]); } if (debug) { System.out.println(" A = " + A.toString()); System.out.println(" b = " + b.toString()); System.out.println(""); } // -------------- // LR - Zerlegung // -------------- LUDecomposition ALU = new LUDecomposition(A); if (debug) System.out.println(ALU.toString()); DoubleMatrix2D L = ALU.getL(); R = ALU.getU(); int[] piv = ALU.getPivot(); Algebra alg = new Algebra(); // if (debug) System.out.println("L = " + L.toString()); // if (debug) System.out.println("Kontrolle L*R = " + alg.mult(L,R).toString()); // if (debug) System.out.println("Kontrolle P*b = " + alg.permute(b, piv, null) ); // // if (debug) System.out.println("Rx = c: R = " + R.toString()); // if (debug) System.out.println("alg.permute(b, piv, null) = " + alg.permute(b, piv, // null).toString()); c = alg.solve(L, alg.permute(b, piv, null)); // TODO: kann zu Problemen führen, // wenn weniger Gleichungen als Unbek --> s.Workaround oben if (debug) System.out.println("Lc = Pb: c = " + c.toString()); if (debug) { System.out.println("Rang A: " + alg.rank(A)); System.out.println("Rang R: " + alg.rank(R)); } assert (alg.rank(A) == alg.rank(R)) : "Rang von A ungleich Rang von R --> Programmfehler"; anzUnbestParam = A.columns() - alg.rank(A); if (debug) System.out.println("Anz unbest Parameter: " + anzUnbestParam); }
protected double computeFunctionGradient(double lambda[], double grad[]) { initMDone = false; if (params.trainerType.equals("ll")) return computeFunctionGradientLL(lambda, grad); double logli = 0; try { for (int f = 0; f < lambda.length; f++) { grad[f] = -1 * lambda[f] * params.invSigmaSquare; logli -= ((lambda[f] * lambda[f]) * params.invSigmaSquare) / 2; } boolean doScaling = params.doScaling; diter.startScan(); if (featureGenCache != null) featureGenCache.startDataScan(); int numRecord = 0; for (numRecord = 0; diter.hasNext(); numRecord++) { DataSequence dataSeq = (DataSequence) diter.next(); if (featureGenCache != null) featureGenCache.nextDataIndex(); if (params.debugLvl > 1) { Util.printDbg("Read next seq: " + numRecord + " logli " + logli); } alpha_Y.assign(1); for (int f = 0; f < lambda.length; f++) ExpF[f] = 0; if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) { beta_Y = new DenseDoubleMatrix1D[2 * dataSeq.length()]; for (int i = 0; i < beta_Y.length; i++) beta_Y[i] = new DenseDoubleMatrix1D(numY); scale = new double[2 * dataSeq.length()]; } // compute beta values in a backward scan. // also scale beta-values to 1 to avoid numerical problems. scale[dataSeq.length() - 1] = (doScaling) ? numY : 1; beta_Y[dataSeq.length() - 1].assign(1.0 / scale[dataSeq.length() - 1]); for (int i = dataSeq.length() - 1; i > 0; i--) { if (params.debugLvl > 2) { Util.printDbg("Features fired"); // featureGenerator.startScanFeaturesAt(dataSeq, i); // while (featureGenerator.hasNext()) { // Feature feature = featureGenerator.next(); // Util.printDbg(feature.toString()); // } } // compute the Mi matrix initMDone = computeLogMi( featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, true, reuseM, initMDone); tmp_Y.assign(beta_Y[i]); tmp_Y.assign(Ri_Y, multFunc); RobustMath.Mult(Mi_YY, tmp_Y, beta_Y[i - 1], 1, 0, false, edgeGen); // Mi_YY.zMult(tmp_Y, beta_Y[i-1]); // need to scale the beta-s to avoid overflow scale[i - 1] = doScaling ? beta_Y[i - 1].zSum() : 1; if ((scale[i - 1] < 1) && (scale[i - 1] > -1)) scale[i - 1] = 1; constMultiplier.multiplicator = 1.0 / scale[i - 1]; beta_Y[i - 1].assign(constMultiplier); } double thisSeqLogli = 0; for (int i = 0; i < dataSeq.length(); i++) { // compute the Mi matrix initMDone = computeLogMi( featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, true, reuseM, initMDone); // find features that fire at this position.. featureGenerator.startScanFeaturesAt(dataSeq, i); if (i > 0) { tmp_Y.assign(alpha_Y); RobustMath.Mult(Mi_YY, tmp_Y, newAlpha_Y, 1, 0, true, edgeGen); // Mi_YY.zMult(tmp_Y, newAlpha_Y,1,0,true); newAlpha_Y.assign(Ri_Y, multFunc); } else { newAlpha_Y.assign(Ri_Y); } while (featureGenerator.hasNext()) { Feature feature = featureGenerator.next(); int f = feature.index(); int yp = feature.y(); int yprev = feature.yprev(); float val = feature.value(); if ((dataSeq.y(i) == yp) && (((i - 1 >= 0) && (yprev == dataSeq.y(i - 1))) || (yprev < 0))) { grad[f] += val; thisSeqLogli += val * lambda[f]; } if (yprev < 0) { ExpF[f] += newAlpha_Y.get(yp) * val * beta_Y[i].get(yp); } else { ExpF[f] += alpha_Y.get(yprev) * Ri_Y.get(yp) * Mi_YY.get(yprev, yp) * val * beta_Y[i].get(yp); } } alpha_Y.assign(newAlpha_Y); // now scale the alpha-s to avoid overflow problems. constMultiplier.multiplicator = 1.0 / scale[i]; alpha_Y.assign(constMultiplier); if (params.debugLvl > 2) { System.out.println("Alpha-i " + alpha_Y.toString()); System.out.println("Ri " + Ri_Y.toString()); System.out.println("Mi " + Mi_YY.toString()); System.out.println("Beta-i " + beta_Y[i].toString()); } } double Zx = alpha_Y.zSum(); thisSeqLogli -= log(Zx); // correct for the fact that alpha-s were scaled. for (int i = 0; i < dataSeq.length(); i++) { thisSeqLogli -= log(scale[i]); } logli += thisSeqLogli; // update grad. for (int f = 0; f < grad.length; f++) grad[f] -= ExpF[f] / Zx; if (params.debugLvl > 1) { System.out.println( "Sequence " + thisSeqLogli + " logli " + logli + " log(Zx) " + Math.log(Zx) + " Zx " + Zx); } } if (params.debugLvl > 2) { for (int f = 0; f < lambda.length; f++) System.out.print(lambda[f] + " "); System.out.println(" :x"); for (int f = 0; f < lambda.length; f++) System.out.println(featureGenerator.featureName(f) + " " + grad[f] + " "); System.out.println(" :g"); } if (params.debugLvl > 0) Util.printDbg( "Iter " + icall + " log likelihood " + logli + " norm(grad logli) " + norm(grad) + " norm(x) " + norm(lambda)); if (icall == 0) { System.out.println("Number of training records" + numRecord); } } catch (Exception e) { System.out.println("Alpha-i " + alpha_Y.toString()); System.out.println("Ri " + Ri_Y.toString()); System.out.println("Mi " + Mi_YY.toString()); e.printStackTrace(); System.exit(0); } return logli; }