private void stochasticUpdateStep(Pair<Integer, Set<Integer>> wordPlusContexts, int s) { double eta = learningRateDecay(s); int wordIndex = wordPlusContexts.getFirst(); // actual center word // Set h vector equal to the kth row of weight matrix W1. h = x' * W = W[k,:] = v(input) RealVector h = W1.getRowVector(wordIndex); // 1xN row vector for (int contextWordIndex : wordPlusContexts.getSecond()) { Set<Integer> negativeContexts; if (sampleUnigram) { negativeContexts = negativeSampleContexts(wordIndex, noiseSampler); } else { negativeContexts = negativeSampleContexts(wordIndex); } // wordIndex is the input word // negativeContexts is the k negative contexts // contextWordIndex is 1 positive context // First update the output vectors for 1 positive context RealVector vPrime_j = W2.getColumnVector(contextWordIndex); // Nx1 column vector double u = h.dotProduct(vPrime_j); // u_j = vPrime(output) * v(input) double t_j = 1.0; // t_j := 1{j == contextWordIndex} double scale = sigmoid(u) - t_j; scale = eta * scale; RealVector gradientOut2Hidden = h.mapMultiply(scale); vPrime_j = vPrime_j.subtract(gradientOut2Hidden); W2.setColumnVector(contextWordIndex, vPrime_j); // Next backpropagate the error to the hidden layer and update the input vectors RealVector v_I = h; u = h.dotProduct(vPrime_j); scale = sigmoid(u) - t_j; scale = eta * scale; RealVector gradientHidden2In = vPrime_j.mapMultiply(scale); v_I = v_I.subtract(gradientHidden2In); h = v_I; W1.setRowVector(wordIndex, v_I); // Repeat update process for k negative contexts t_j = 0.0; // t_j := 1{j == contextWordIndex} for (int negContext : negativeContexts) { vPrime_j = W2.getColumnVector(negContext); u = h.dotProduct(vPrime_j); scale = sigmoid(u) - t_j; scale = eta * scale; gradientOut2Hidden = h.mapMultiply(scale); vPrime_j = vPrime_j.subtract(gradientOut2Hidden); W2.setColumnVector(negContext, vPrime_j); // Backpropagate the error to the hidden layer and update the input vectors v_I = h; u = h.dotProduct(vPrime_j); scale = sigmoid(u) - t_j; scale = eta * scale; gradientHidden2In = vPrime_j.mapMultiply(scale); v_I = v_I.subtract(gradientHidden2In); h = v_I; W1.setRowVector(wordIndex, v_I); } } }
private static double sigmoid(RealVector x, RealVector y) { double z = x.dotProduct(y); return sigmoid(z); }