private static SparseRealMatrix initializeMatrix(SparseRealMatrix matrix, double sigma) { NormalDistribution normRandom = new NormalDistribution(0.0, sigma); int r = matrix.getRowDimension(); int c = matrix.getColumnDimension(); for (int i = 0; i < r; i++) { for (int j = 0; j < c; j++) { double x = normRandom.sample(); matrix.setEntry(i, j, x); } } return matrix; }
private void stochasticUpdateStep(Pair<Integer, Set<Integer>> wordPlusContexts, int s) { double eta = learningRateDecay(s); int wordIndex = wordPlusContexts.getFirst(); // actual center word // Set h vector equal to the kth row of weight matrix W1. h = x' * W = W[k,:] = v(input) RealVector h = W1.getRowVector(wordIndex); // 1xN row vector for (int contextWordIndex : wordPlusContexts.getSecond()) { Set<Integer> negativeContexts; if (sampleUnigram) { negativeContexts = negativeSampleContexts(wordIndex, noiseSampler); } else { negativeContexts = negativeSampleContexts(wordIndex); } // wordIndex is the input word // negativeContexts is the k negative contexts // contextWordIndex is 1 positive context // First update the output vectors for 1 positive context RealVector vPrime_j = W2.getColumnVector(contextWordIndex); // Nx1 column vector double u = h.dotProduct(vPrime_j); // u_j = vPrime(output) * v(input) double t_j = 1.0; // t_j := 1{j == contextWordIndex} double scale = sigmoid(u) - t_j; scale = eta * scale; RealVector gradientOut2Hidden = h.mapMultiply(scale); vPrime_j = vPrime_j.subtract(gradientOut2Hidden); W2.setColumnVector(contextWordIndex, vPrime_j); // Next backpropagate the error to the hidden layer and update the input vectors RealVector v_I = h; u = h.dotProduct(vPrime_j); scale = sigmoid(u) - t_j; scale = eta * scale; RealVector gradientHidden2In = vPrime_j.mapMultiply(scale); v_I = v_I.subtract(gradientHidden2In); h = v_I; W1.setRowVector(wordIndex, v_I); // Repeat update process for k negative contexts t_j = 0.0; // t_j := 1{j == contextWordIndex} for (int negContext : negativeContexts) { vPrime_j = W2.getColumnVector(negContext); u = h.dotProduct(vPrime_j); scale = sigmoid(u) - t_j; scale = eta * scale; gradientOut2Hidden = h.mapMultiply(scale); vPrime_j = vPrime_j.subtract(gradientOut2Hidden); W2.setColumnVector(negContext, vPrime_j); // Backpropagate the error to the hidden layer and update the input vectors v_I = h; u = h.dotProduct(vPrime_j); scale = sigmoid(u) - t_j; scale = eta * scale; gradientHidden2In = vPrime_j.mapMultiply(scale); v_I = v_I.subtract(gradientHidden2In); h = v_I; W1.setRowVector(wordIndex, v_I); } } }
private HashMap<String, float[]> convertEmbeddings(Set<String> targetVocab) { // For every string in vocabulary // Get corresponding column of output matrix W2 // Map String to array of floats HashMap<String, float[]> embeddingMatrix = new HashMap<String, float[]>(); for (String word : targetVocab) { int wordIndex = encodedVocab.get(word); double[] wordEmbedding = W2.getColumn(wordIndex); float[] wordEmbeddingFloat = new float[wordEmbedding.length]; for (int i = 0; i < wordEmbedding.length; i++) { wordEmbeddingFloat[i] = (float) wordEmbedding[i]; } embeddingMatrix.put(word, wordEmbeddingFloat); } return embeddingMatrix; }