/** * Applies sparsity to the passed in hbias gradient * * @param hBiasGradient the hbias gradient to apply to * @param learningRate the learning rate used */ protected void applySparsity(DoubleMatrix hBiasGradient, double learningRate) { if (useAdaGrad) { DoubleMatrix change = this.hBiasAdaGrad .getLearningRates(hBias) .neg() .mul(sparsity) .mul(hBiasGradient.mul(sparsity)); hBiasGradient.addi(change); } else { DoubleMatrix change = hBiasGradient.mul(sparsity).mul(-learningRate * sparsity); hBiasGradient.addi(change); } }
@Override public void reduce( Text key, Iterable<jBLASArrayWritable> inputs, WeightContributions.Context context) throws IOException, InterruptedException { DoubleMatrix w_cont = new DoubleMatrix(), hb_cont = new DoubleMatrix(), vb_cont = new DoubleMatrix(), weights = null, hbias = null, vbias = null; ArrayList<DoubleMatrix> chainList = new ArrayList<DoubleMatrix>(); ArrayList<DoubleMatrix> output_array = new ArrayList<DoubleMatrix>(); int count = 0; for (jBLASArrayWritable input : inputs) { ArrayList<DoubleMatrix> data = input.getData(); w_cont.copy(data.get(0)); hb_cont.copy(data.get(1)); vb_cont.copy(data.get(3)); // save list of all hidden chains for updates to batch files in phase 3 chainList.add(new DoubleMatrix(data.get(2).toArray2())); if (weights == null) { weights = DoubleMatrix.zeros(w_cont.rows, w_cont.columns); hbias = DoubleMatrix.zeros(hb_cont.rows, hb_cont.columns); vbias = DoubleMatrix.zeros(vb_cont.rows, vb_cont.columns); } // sum weight contributions weights.addi(w_cont); hbias.addi(hb_cont); vbias.addi(vb_cont); count++; } output_array.add(weights.div(count)); output_array.add(hbias.div(count)); output_array.add(vbias.div(count)); output_array.addAll(chainList); jBLASArrayWritable outputmatrix = new jBLASArrayWritable(output_array); context.write(key, outputmatrix); }
public static DoubleMatrix conv2d(DoubleMatrix input, DoubleMatrix kernel, Type type) { DoubleMatrix xShape = new DoubleMatrix(1, 2); xShape.put(0, input.rows); xShape.put(1, input.columns); DoubleMatrix yShape = new DoubleMatrix(1, 2); yShape.put(0, kernel.rows); yShape.put(1, kernel.columns); DoubleMatrix zShape = xShape.addi(yShape).subi(1); int retRows = (int) zShape.get(0); int retCols = (int) zShape.get(1); ComplexDoubleMatrix fftInput = complexDisceteFourierTransform(input, retRows, retCols); ComplexDoubleMatrix fftKernel = complexDisceteFourierTransform(kernel, retRows, retCols); ComplexDoubleMatrix mul = fftKernel.muli(fftInput); ComplexDoubleMatrix retComplex = complexInverseDisceteFourierTransform(mul); DoubleMatrix ret = retComplex.getReal(); if (type == Type.VALID) { DoubleMatrix validShape = xShape.subi(yShape).addi(1); DoubleMatrix start = zShape.subi(validShape).divi(2); DoubleMatrix end = start.addi(validShape); if (start.get(0) < 1 || start.get(1) < 1) throw new IllegalStateException("Illegal row index " + start); if (end.get(0) < 1 || end.get(1) < 1) throw new IllegalStateException("Illegal column index " + end); ret = ret.get( RangeUtils.interval((int) start.get(0), (int) end.get(0)), RangeUtils.interval((int) start.get(1), (int) end.get(1))); } return ret; }
private void costantiniUnwrap() throws LPException { final int ny = wrappedPhase.rows - 1; // start from Zero! final int nx = wrappedPhase.columns - 1; // start from Zero! if (wrappedPhase.isVector()) throw new IllegalArgumentException("Input must be 2D array"); if (wrappedPhase.rows < 2 || wrappedPhase.columns < 2) throw new IllegalArgumentException("Size of input must be larger than 2"); // Default weight DoubleMatrix w1 = DoubleMatrix.ones(ny + 1, 1); w1.put(0, 0.5); w1.put(w1.length - 1, 0.5); DoubleMatrix w2 = DoubleMatrix.ones(1, nx + 1); w2.put(0, 0.5); w2.put(w2.length - 1, 0.5); DoubleMatrix weight = w1.mmul(w2); DoubleMatrix i, j, I_J, IP1_J, I_JP1; DoubleMatrix Psi1, Psi2; DoubleMatrix[] ROWS; // Compute partial derivative Psi1, eqt (1,3) i = intRangeDoubleMatrix(0, ny - 1); j = intRangeDoubleMatrix(0, nx); ROWS = grid2D(i, j); I_J = JblasUtils.sub2ind(wrappedPhase.rows, ROWS[0], ROWS[1]); IP1_J = JblasUtils.sub2ind(wrappedPhase.rows, ROWS[0].add(1), ROWS[1]); Psi1 = JblasUtils.getMatrixFromIdx(wrappedPhase, IP1_J) .sub(JblasUtils.getMatrixFromIdx(wrappedPhase, I_J)); Psi1 = UnwrapUtils.wrapDoubleMatrix(Psi1); // Compute partial derivative Psi2, eqt (2,4) i = intRangeDoubleMatrix(0, ny); j = intRangeDoubleMatrix(0, nx - 1); ROWS = grid2D(i, j); I_J = JblasUtils.sub2ind(wrappedPhase.rows, ROWS[0], ROWS[1]); I_JP1 = JblasUtils.sub2ind(wrappedPhase.rows, ROWS[0], ROWS[1].add(1)); Psi2 = JblasUtils.getMatrixFromIdx(wrappedPhase, I_JP1) .sub(JblasUtils.getMatrixFromIdx(wrappedPhase, I_J)); Psi2 = UnwrapUtils.wrapDoubleMatrix(Psi2); // Compute beq DoubleMatrix beq = DoubleMatrix.zeros(ny, nx); i = intRangeDoubleMatrix(0, ny - 1); j = intRangeDoubleMatrix(0, nx - 1); ROWS = grid2D(i, j); I_J = JblasUtils.sub2ind(Psi1.rows, ROWS[0], ROWS[1]); I_JP1 = JblasUtils.sub2ind(Psi1.rows, ROWS[0], ROWS[1].add(1)); beq.addi(JblasUtils.getMatrixFromIdx(Psi1, I_JP1).sub(JblasUtils.getMatrixFromIdx(Psi1, I_J))); I_J = JblasUtils.sub2ind(Psi2.rows, ROWS[0], ROWS[1]); I_JP1 = JblasUtils.sub2ind(Psi2.rows, ROWS[0].add(1), ROWS[1]); beq.subi(JblasUtils.getMatrixFromIdx(Psi2, I_JP1).sub(JblasUtils.getMatrixFromIdx(Psi2, I_J))); beq.muli(-1 / (2 * Constants._PI)); for (int k = 0; k < beq.length; k++) { beq.put(k, Math.round(beq.get(k))); } beq.reshape(beq.length, 1); logger.debug("Constraint matrix"); i = intRangeDoubleMatrix(0, ny - 1); j = intRangeDoubleMatrix(0, nx - 1); ROWS = grid2D(i, j); DoubleMatrix ROW_I_J = JblasUtils.sub2ind(i.length, ROWS[0], ROWS[1]); int nS0 = nx * ny; // Use by S1p, S1m DoubleMatrix[] COLS; COLS = grid2D(i, j); DoubleMatrix COL_IJ_1 = JblasUtils.sub2ind(i.length, COLS[0], COLS[1]); COLS = grid2D(i, j.add(1)); DoubleMatrix COL_I_JP1 = JblasUtils.sub2ind(i.length, COLS[0], COLS[1]); int nS1 = (nx + 1) * (ny); // SOAPBinding.Use by S2p, S2m COLS = grid2D(i, j); DoubleMatrix COL_IJ_2 = JblasUtils.sub2ind(i.length + 1, COLS[0], COLS[1]); COLS = grid2D(i.add(1), j); DoubleMatrix COL_IP1_J = JblasUtils.sub2ind(i.length + 1, COLS[0], COLS[1]); int nS2 = nx * (ny + 1); // Equality constraint matrix (Aeq) /* S1p = + sparse(ROW_I_J, COL_I_JP1,1,nS0,nS1) ... - sparse(ROW_I_J, COL_IJ_1,1,nS0,nS1); S1m = -S1p; S2p = - sparse(ROW_I_J, COL_IP1_J,1,nS0,nS2) ... + sparse(ROW_I_J, COL_IJ_2,1,nS0,nS2); S2m = -S2p; */ // ToDo: Aeq matrix should be sparse from it's initialization, look into JblasMatrix factory for // howto // ...otherwise even a data set of eg 40x40 pixels will exhaust heap: // ... dimension of Aeq (equality constraints) matrix for 30x30 input is 1521x6240 matrix // ... dimension of Aeq ( ) matrix for 50x50 input is 2401x9800 // ... dimension of Aeq ( ) matrix for 512x512 input is 261121x1046528 DoubleMatrix S1p = JblasUtils.setUpMatrixFromIdx(nS0, nS1, ROW_I_J, COL_I_JP1) .sub(JblasUtils.setUpMatrixFromIdx(nS0, nS1, ROW_I_J, COL_IJ_1)); DoubleMatrix S1m = S1p.neg(); DoubleMatrix S2p = JblasUtils.setUpMatrixFromIdx(nS0, nS2, ROW_I_J, COL_IP1_J) .neg() .add(JblasUtils.setUpMatrixFromIdx(nS0, nS2, ROW_I_J, COL_IJ_2)); DoubleMatrix S2m = S2p.neg(); DoubleMatrix Aeq = concatHorizontally(concatHorizontally(S1p, S1m), concatHorizontally(S2p, S2m)); final int nObs = Aeq.columns; final int nUnkn = Aeq.rows; DoubleMatrix c1 = JblasUtils.getMatrixFromRange(0, ny, 0, weight.columns, weight); DoubleMatrix c2 = JblasUtils.getMatrixFromRange(0, weight.rows, 0, nx, weight); c1.reshape(c1.length, 1); c2.reshape(c2.length, 1); DoubleMatrix cost = DoubleMatrix.concatVertically( DoubleMatrix.concatVertically(c1, c1), DoubleMatrix.concatVertically(c2, c2)); logger.debug("Minimum network flow resolution"); StopWatch clockLP = new StopWatch(); LinearProgram lp = new LinearProgram(cost.data); lp.setMinProblem(true); boolean[] integerBool = new boolean[nObs]; double[] lowerBound = new double[nObs]; double[] upperBound = new double[nObs]; for (int k = 0; k < nUnkn; k++) { lp.addConstraint(new LinearEqualsConstraint(Aeq.getRow(k).toArray(), beq.get(k), "cost")); } for (int k = 0; k < nObs; k++) { integerBool[k] = true; lowerBound[k] = 0; upperBound[k] = 99999; } // setup bounds and integer nature lp.setIsinteger(integerBool); lp.setUpperbound(upperBound); lp.setLowerbound(lowerBound); LinearProgramSolver solver = SolverFactory.newDefault(); // double[] solution; // solution = solver.solve(lp); DoubleMatrix solution = new DoubleMatrix(solver.solve(lp)); clockLP.stop(); logger.debug("Total GLPK time: {} [sec]", (double) (clockLP.getElapsedTime()) / 1000); // Displatch the LP solution int offset; int[] idx1p = JblasUtils.intRangeIntArray(0, nS1 - 1); DoubleMatrix x1p = solution.get(idx1p); x1p.reshape(ny, nx + 1); offset = idx1p[nS1 - 1] + 1; int[] idx1m = JblasUtils.intRangeIntArray(offset, offset + nS1 - 1); DoubleMatrix x1m = solution.get(idx1m); x1m.reshape(ny, nx + 1); offset = idx1m[idx1m.length - 1] + 1; int[] idx2p = JblasUtils.intRangeIntArray(offset, offset + nS2 - 1); DoubleMatrix x2p = solution.get(idx2p); x2p.reshape(ny + 1, nx); offset = idx2p[idx2p.length - 1] + 1; int[] idx2m = JblasUtils.intRangeIntArray(offset, offset + nS2 - 1); DoubleMatrix x2m = solution.get(idx2m); x2m.reshape(ny + 1, nx); // Compute the derivative jumps, eqt (20,21) DoubleMatrix k1 = x1p.sub(x1m); DoubleMatrix k2 = x2p.sub(x2m); // (?) Round to integer solution if (roundK == true) { for (int idx = 0; idx < k1.length; idx++) { k1.put(idx, FastMath.floor(k1.get(idx))); } for (int idx = 0; idx < k2.length; idx++) { k2.put(idx, FastMath.floor(k2.get(idx))); } } // Sum the jumps with the wrapped partial derivatives, eqt (10,11) k1.reshape(ny, nx + 1); k2.reshape(ny + 1, nx); k1.addi(Psi1.div(Constants._TWO_PI)); k2.addi(Psi2.div(Constants._TWO_PI)); // Integrate the partial derivatives, eqt (6) // cumsum() method in JblasTester -> see cumsum_demo() in JblasTester.cumsum_demo() DoubleMatrix k2_temp = DoubleMatrix.concatHorizontally(DoubleMatrix.zeros(1), k2.getRow(0)); k2_temp = JblasUtils.cumsum(k2_temp, 1); DoubleMatrix k = DoubleMatrix.concatVertically(k2_temp, k1); k = JblasUtils.cumsum(k, 1); // Unwrap - final solution unwrappedPhase = k.mul(Constants._TWO_PI); }
public double train(DataSet train_data) { _n_inputs = train_data._n_cols; int n_class = train_data._n_classes; _n_outputs = Math.min(_n_inputs, n_class - 1); build(); // train_data.print_dat_format(); DoubleMatrix Sw = DoubleMatrix.zeros(_n_inputs, _n_inputs); DoubleMatrix Sb = DoubleMatrix.zeros(_n_inputs, _n_inputs); int[] n_classes = new int[n_class]; double[][] mean_for_class = new double[n_class][_n_inputs]; double[] mean = new double[_n_inputs]; int n_examples = train_data._n_rows, i, j; for (i = 0; i < n_examples; i++) { double[] X = train_data.get_X(i); int label = train_data.get_label(i); for (j = 0; j < _n_inputs; j++) { mean_for_class[label][j] += X[j]; mean[j] += X[j]; } n_classes[label]++; } double[][] diff_class_mean = new double[n_class][_n_inputs]; for (j = 0; j < _n_inputs; j++) { mean[j] /= 1.0 * n_examples; // compute the total mean value; for (i = 0; i < n_class; i++) { if (n_classes[i] == 0) { mean_for_class[i][j] = 0; } else { mean_for_class[i][j] /= 1.0 * n_classes[i]; } diff_class_mean[i][j] = mean_for_class[i][j] - mean[j]; } } // OutFile.printf(mean_for_class); for (i = 0; i < n_class; i++) { double[] diff = diff_class_mean[i]; // Sb = Sb + c_i (v_i - mean)(v_i - mean); Sb.addi(new DoubleMatrix(Vec.outer_dot(diff, diff)).muli(n_classes[i])); } // for(i = 0; i < _n_inputs; i++) // { // for(j = 0; j < _n_inputs; j++) // { // OutFile.printf("%f ",Sb.get(i,j)); // } // OutFile.printf("\n"); // } for (i = 0; i < n_examples; i++) { double[] diff = Vec.minus(train_data.get_X(i), mean_for_class[train_data.get_label(i)]); // Sw = Sw + diff * diff Sw.addi(new DoubleMatrix(Vec.outer_dot(diff, diff))); } DoubleMatrix[] eig_sw = Eigen.symmetricEigenvectors(Sw); // for(i = 0; i < _n_inputs; i++) // { // for(j = 0; j < _n_inputs; j++) // { // OutFile.printf("%f ",eig_sw[0].get(i,j)); // } // OutFile.printf("\n"); // } double s; for (j = 0; j < _n_inputs; j++) { s = eig_sw[1].get(j, j); // OutFile.printf("%f\n",s); if (s > General.SMALL_CONST) { s = 1.0 / Math.sqrt(s); } else { s = 0; } for (i = 0; i < _n_inputs; i++) { eig_sw[0].put(i, j, eig_sw[0].get(i, j) * s); } } // eig_sw[0].print(); // OutFile.printf(" sf.\n"); // for(i = 0; i < _n_inputs; i++) // { // for(j = 0; j < _n_inputs; j++) // { // OutFile.printf("%f ",eig_sw[0].get(i,j)); // } // OutFile.printf("\n"); // } // eig_sw[0].print(); Sb = eig_sw[0].transpose().mmul(Sb).mmul(eig_sw[0]); // OutFile.printf(" sb.\n"); // for(i = 0; i < _n_inputs; i++) // { // for(j = 0; j < _n_inputs; j++) // { // OutFile.printf("%f ",Sb.get(i,j)); // } // OutFile.printf("\n"); // } DoubleMatrix[] eig_sb = Eigen.symmetricEigenvectors(Sb); double sum = 0; for (i = 0; i < _n_inputs; i++) { sum += eig_sb[1].get(i, i); } OutFile.printf("eig value: \n"); for (i = 0; i < _n_inputs; i++) { OutFile.printf("%f ", eig_sb[1].get(i, i) / sum); } OutFile.printf("\n"); eig_sw[0].mmuli(eig_sb[0]); // //eig_vec[0].print(); // for(i = 0; i < _n_inputs; i++) // { // for(j = 0; j < _n_inputs; j++) // { // OutFile.printf("%f ",eig_sb[1].get(i,j)); // } // OutFile.printf("\n"); // } // OutFile.printf("%d %d outputs: %d\n",eig_vec[0].rows,eig_vec[0].columns, _n_outputs); for (i = 0; i < _n_outputs; i++) { for (j = 0; j < _n_inputs; j++) { _eig_mat[j][i] = eig_sw[0].get(j, _n_inputs - 1 - i); } } return 0.0f; }
@Override public void merge(NeuralNetwork network, int batchSize) { W.addi(network.getW().sub(W).div(batchSize)); hBias.addi(network.gethBias().sub(hBias).divi(batchSize)); vBias.addi(network.getvBias().subi(vBias).divi(batchSize)); }
/** * Update the gradient according to the configuration such as adagrad, momentum, and sparsity * * @param gradient the gradient to modify * @param iteration the current iteration * @param learningRate the learning rate for the current iteration */ protected void updateGradientAccordingToParams( NeuralNetworkGradient gradient, int iteration, double learningRate) { DoubleMatrix wGradient = gradient.getwGradient(); DoubleMatrix hBiasGradient = gradient.gethBiasGradient(); DoubleMatrix vBiasGradient = gradient.getvBiasGradient(); // reset adagrad history if (iteration != 0 && resetAdaGradIterations > 0 && iteration % resetAdaGradIterations == 0) { wAdaGrad.historicalGradient = null; hBiasAdaGrad.historicalGradient = null; vBiasAdaGrad.historicalGradient = null; if (this.W != null && this.wAdaGrad == null) this.wAdaGrad = new AdaGrad(this.W.rows, this.W.columns); if (this.vBias != null && this.vBiasAdaGrad == null) this.vBiasAdaGrad = new AdaGrad(this.vBias.rows, this.vBias.columns); if (this.hBias != null && this.hBiasAdaGrad == null) this.hBiasAdaGrad = new AdaGrad(this.hBias.rows, this.hBias.columns); log.info("Resetting adagrad"); } DoubleMatrix wLearningRates = wAdaGrad.getLearningRates(wGradient); // change up momentum after so many iterations if specified double momentum = this.momentum; if (momentumAfter != null && !momentumAfter.isEmpty()) { int key = momentumAfter.keySet().iterator().next(); if (iteration >= key) { momentum = momentumAfter.get(key); } } if (useAdaGrad) wGradient.muli(wLearningRates); else wGradient.muli(learningRate); if (useAdaGrad) hBiasGradient = hBiasGradient.mul(hBiasAdaGrad.getLearningRates(hBiasGradient)); else hBiasGradient = hBiasGradient.mul(learningRate); if (useAdaGrad) vBiasGradient = vBiasGradient.mul(vBiasAdaGrad.getLearningRates(vBiasGradient)); else vBiasGradient = vBiasGradient.mul(learningRate); // only do this with binary hidden layers if (applySparsity && this.hBiasGradient != null) applySparsity(hBiasGradient, learningRate); if (momentum != 0 && this.wGradient != null) wGradient.addi(this.wGradient.mul(momentum).add(wGradient.mul(1 - momentum))); if (momentum != 0 && this.vBiasGradient != null) vBiasGradient.addi(this.vBiasGradient.mul(momentum).add(vBiasGradient.mul(1 - momentum))); if (momentum != 0 && this.hBiasGradient != null) hBiasGradient.addi(this.hBiasGradient.mul(momentum).add(hBiasGradient.mul(1 - momentum))); if (normalizeByInputRows) { wGradient.divi(lastMiniBatchSize); vBiasGradient.divi(lastMiniBatchSize); hBiasGradient.divi(lastMiniBatchSize); } // simulate post gradient application and apply the difference to the gradient to decrease the // change the gradient has if (useRegularization && l2 > 0) { if (useAdaGrad) wGradient.subi(W.mul(l2).mul(wLearningRates)); else wGradient.subi(W.mul(l2 * learningRate)); } if (constrainGradientToUnitNorm) { wGradient.divi(wGradient.norm2()); vBiasGradient.divi(vBiasGradient.norm2()); hBiasGradient.divi(hBiasGradient.norm2()); } this.wGradient = wGradient; this.vBiasGradient = vBiasGradient; this.hBiasGradient = hBiasGradient; }