/** * Pushes an input vector through the LSTM and gets an output. Cell state and recurrent result * value are updated. * * @param input The input vector to push through * @return The result vector */ public AVector step(AVector input) { // concatenate result vector onto the end of input vector, dimension zero as it is 1-dimensional // AVector // System.out.println(input); // System.out.println(result); input = input.join(result); if (input.length() != weights[0].getShape(1)) { throw new RuntimeException( "Input was the wrong shape! Input ( + last out) was " + input.length() + "but weights were " + weights[0].getShape(1)); } // System.out.println(input); // System.out.println(input.rows()); /* There are 4 layers in LSTM, in order of sig(0) sig(1) sig(2) tanh(3) */ // For each sigmoid layer, multiply its weight matrix by the input vector, add its bias vector, // and perform sigmoid operation on resultant vector for (int i = 0; i < 3; i++) { // System.out.println(weights[i].columns()); // The "columns" of the weights 3d matrix should represent the two-dimensional matrices for // each of the activations. sigmoidMult1[i] = weights[i].innerProduct(input); // System.out.println(sigmoidMult1[i]); sigmoidMult1[i].add(biases[i]); // System.out.println(sigmoidMult1[i]); sigmoidLayers[i] = Operations.Sigmoid.operate(sigmoidMult1[i]); // System.out.println(sigmoidLayers[i]); } // Calculate tanh layer in same fashion as sigmoid layer, but with tanh activation function tanhLayer = weights[3].innerProduct(input); tanhLayer.add(biases[3]); tanhLayer = Operations.Tanh.operate(tanhLayer); // do the first element-wise multiplication: sigmoid layer 1 and the current cell state sigmoidLayers[0].multiply(cellState); // System.out.println("multiplication of forget and cell state: " + sigmoidLayers[0]); // do the second element-wise multiplication: sigmoid layer 2 and the tanh layer sigmoidLayers[1].multiply(tanhLayer); // System.out.println("multiplication of input and activation layer: " + sigmoidLayers[1]); sigmoidLayers[0].add(sigmoidLayers[1]); // System.out.println("addition of " + sigmoidLayers[1]); cellState = sigmoidLayers[0]; sigmoidLayers[0] = null; sigmoidLayers[1] = null; AVector tanhOp = Operations.Tanh.operate(cellState.copy()); sigmoidLayers[2].multiply(tanhOp); result = sigmoidLayers[2]; sigmoidLayers[2] = null; return result.copy(); }
@Override public void setRow(int i, AVector row) { int cc = checkColumnCount(row.length()); for (int j = 0; j < cc; i++) { data[index(i, j)] = row.unsafeGet(i); } }
@Test public void testNonZeroCount() { AVector v = Vectorz.createUniformRandomVector(5); v.add(1); assertEquals(v.length(), v.nonZeroCount()); v.scale(0.0); assertEquals(0, v.nonZeroCount()); }
@Override public boolean load(INDArray data, String loadPath) { boolean found = true; switch (loadPath) { case "activate_b": activationBiases = (AVector) data; break; case "activate_w": activationWeights = (AMatrix) data; break; case "forget_b": forgetBiases = (AVector) data; break; case "forget_w": forgetWeights = (AMatrix) data; break; case "input_b": inputBiases = (AVector) data; break; case "input_w": inputWeights = (AMatrix) data; break; case "out_b": outputBiases = (AVector) data; break; case "out_w": outputWeights = (AMatrix) data; break; case "initialstate": AVector dataCast = (AVector) data; cellState = dataCast.subVector(0, (dataCast.length() / 2)).dense(); // System.out.println(cellState); result = dataCast.subVector(dataCast.length() / 2, dataCast.length() / 2).dense(); break; default: found = false; break; } initWeights(); initBiases(); return found; }
@Override public void transformInPlace(AVector v) { if (v instanceof AArrayVector) { transformInPlace((AArrayVector) v); return; } if (v.length() != dimensions) throw new IllegalArgumentException(ErrorMessages.incompatibleShapes(this, v)); for (int i = 0; i < dimensions; i++) { v.unsafeSet(i, v.unsafeGet(i) * unsafeGetDiagonalValue(i)); } }
/** The train function of OrdRec. Get all parameters after learning process. */ @SuppressWarnings("ConstantConditions") private void train(SparseVector ratings, MutableSparseVector scores) { Vector dbeta = Vector.createLength(beta.length()); double dt1; // n is the number of iteration; for (int j = 0; j < iterationCount; j++) { for (VectorEntry rating : ratings.fast()) { long iid = rating.getKey(); double score = scores.get(iid); int r = quantizer.index(rating.getValue()); double probEqualR = getProbEQ(score, r); double probLessR = getProbLE(score, r); double probLessR_1 = getProbLE(score, r - 1); dt1 = learningRate / probEqualR * (probLessR * (1 - probLessR) * derivateOfBeta(r, 0, t1) - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, 0, t1) - regTerm * t1); double dbetaK; for (int k = 0; k < beta.length(); k++) { dbetaK = learningRate / probEqualR * (probLessR * (1 - probLessR) * derivateOfBeta(r, k + 1, beta.get(k)) - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, k + 1, beta.get(k)) - regTerm * beta.get(k)); dbeta.set(k, dbetaK); } t1 = t1 + dt1; beta.add(dbeta); } } }
/** * The constructor of OrdRecParameter. It use the quantized values of rating to initialize t1 * and beta. Each threshold is initialized as the mean of two contiguous rating values. Since * the index of quantizer is always an successive non-negative integer begin from 0, so t1 will * initialize as 0.5, and the interval between two thresholds will be 1. * * @param qtz The quantizer for ratings */ private OrdRecModel(Quantizer qtz) { qtzValues = qtz.getValues(); levelCount = qtzValues.length(); t1 = (qtzValues.get(0) + qtzValues.get(1)) / 2; beta = Vector.createLength(levelCount - 2); double tr = t1; for (int i = 1; i <= beta.length(); i++) { double trnext = (qtzValues.get(i) + qtzValues.get(i + 1)) * 0.5; beta.set(i - 1, Math.log(trnext - tr)); tr = trnext; } }
/** * Get the rth threshold. * * @param thresholdIndex The index of the threshold * @return the rth threshold. */ public double getThreshold(int thresholdIndex) { double tr = t1; if (thresholdIndex < 0) { return Double.NEGATIVE_INFINITY; } else if (thresholdIndex == 0) { return tr; } else if (thresholdIndex > beta.length()) { return Double.POSITIVE_INFINITY; } else { for (int k = 0; k < thresholdIndex; k++) tr += Math.exp(beta.get(k)); return tr; } }
/** Sees if the pair of eigenvalue and eigenvector was found in the decomposition. */ public void testForEigenpair( SymmetricQRAlgorithmDecomposition alg, double valueReal, double valueImg, double... vector) { int N = alg.getNumberOfEigenvalues(); int numMatched = 0; for (int i = 0; i < N; i++) { Vector2 c = alg.getEigenvalue(i); if (Math.abs(c.x - valueReal) < 1e-4 && Math.abs(c.y - valueImg) < 1e-4) { // if( c.isReal() ) { if (Math.abs(c.y - 0) < 1e-8) if (vector.length > 0) { AVector v = alg.getEigenVector(i); AMatrix e = Matrix.createFromRows(vector); e = e.getTranspose(); Matrix t = Matrix.create(v.length(), 1); t.setColumn(0, v); double error = diffNormF(e, t); // CommonOps.changeSign(e); e.multiply(-1); double error2 = diffNormF(e, t); if (error < 1e-3 || error2 < 1e-3) numMatched++; } else { numMatched++; } else if (Math.abs(c.y - 0) > 1e-8) { numMatched++; } } } assertEquals(1, numMatched); }
@Override public void setColumn(int j, AVector col) { int rc = checkRowCount(col.length()); col.getElements(data, j * rc); }
@Override public double dotProduct(AVector v) { if (v.length() != length) throw new IllegalArgumentException("Different vector lengths"); return 0.0; }
@Override public int columnCount() { return vector.length(); }
/** * Checks to see if an eigenvalue is complex then the eigenvector is null. If it is real it then * checks to see if the equation A*v = lambda*v holds true. */ public void testPairsConsistent(SymmetricQRAlgorithmDecomposition alg, Matrix A) { // // System.out.println("-------------------------------------------------------------------------"); int N = alg.getNumberOfEigenvalues(); for (int i = 0; i < N; i++) { Vector2 c = alg.getEigenvalue(i); AVector v = alg.getEigenVector(i); if (Double.isInfinite(c.x) || Double.isNaN(c.x) || Double.isInfinite(c.y) || Double.isNaN(c.y)) fail("Uncountable eigenvalue"); if (Math.abs(c.y) > 1e-20) { assertTrue(v == null); } else { assertTrue(v != null); // if( MatrixFeatures.hasUncountable(v)) { // throw new RuntimeException("Egads"); // } assertFalse(v.hasUncountable()); // CommonOps.mult(A,v,tempA); Matrix ta = Matrix.create(A.rowCount(), 1); ta.setColumn(0, (v)); AMatrix tempA = Multiplications.multiply(A, ta); // CommonOps.scale(c.real,v,tempB); Matrix tb = Matrix.create(v.length(), 1); tb.setColumn(0, v); AMatrix tempB = tb.multiplyCopy(c.x); // double max = NormOps.normPInf(A); double max = normPInf(A); if (max == 0) max = 1; double error = diffNormF(tempA, tempB) / max; if (error > 1e-12) { // System.out.println("Original matrix:"); // System.out.println(A); // A.print(); // System.out.println("Eigenvalue = "+c.x); // Eigenpair p = EigenOps.computeEigenVector(A,c.real); // p.vector.print(); // v.print(); // // // CommonOps.mult(A,p.vector,tempA); // CommonOps.scale(c.real,p.vector,tempB); // // max = NormOps.normPInf(A); // // System.out.println("error before = "+error); // error = SpecializedOps.diffNormF(tempA,tempB)/max; // System.out.println("error after = "+error); // A.print("%f"); // System.out.println(); fail("Error was too large"); } assertTrue(error <= 1e-12); } } }