// the the MSE of the H loss public double GetLossH() { double numInstances = 0; double errorSum = 0; for (int i = 0; i < numTotalInstances; i++) for (int l = 0; l < numPatterns; l++) { if (H.get(i, l) != GlobalValues.MISSING_VALUE) { double err = H.get(i, l) - MatrixUtilities.getRowByColumnProduct(S, i, P, l); errorSum += err * err; numInstances += 1.0; } } return errorSum / numInstances; }
// predict the label of the i-th instance public double PredictLabel(int i) { double label = 0; double maxConfidence = 0; for (int l = 0; l < numLabels; l++) { double confidence = Sigmoid.Calculate(MatrixUtilities.getRowByColumnProduct(S, i, W, l)); // + biasW[l]); if (confidence > maxConfidence) { maxConfidence = confidence; label = (double) l; } } return label; }
// get the log loss of the target prediction public double GetLossY(int startIndex, int endIndex) { double YTrainLoss = 0; int numObservedCells = 0; for (int i = startIndex; i < endIndex; i++) for (int l = 0; l < numLabels; l++) if (YExtended.get(i, l) != GlobalValues.MISSING_VALUE) { double y_hat_i = Sigmoid.Calculate( MatrixUtilities.getRowByColumnProduct(S, i, W, l)); // + biasW[l]); double y_i = YExtended.get(i, l); YTrainLoss += -y_i * Math.log(y_hat_i) - (1 - y_i) * Math.log(1 - y_hat_i); numObservedCells++; } return YTrainLoss / (double) numObservedCells; }
public double Optimize() { // initialize the data structures Initialize(); Random rand = new Random(); double prevLossH = Double.MAX_VALUE; int YUpdatefrequency = HObserved.size() / numTotalInstances, i, l, idxY = 0; for (int epoch = 0; epoch < maxEpochs; epoch++) { // update H loss if (alphaH > 0) { double err_il; for (int idx = 0; idx < HObserved.size(); idx++) { i = HObserved.get(idx).row; l = HObserved.get(idx).col; err_il = H.cells[i][l] - MatrixUtilities.getRowByColumnProduct(S, i, P, l) - biasP[l]; for (int k = 0; k < D; k++) { S.cells[i][k] -= eta * (-2 * alphaH * err_il * P.cells[k][l] + lambdaS * S.cells[i][k]); P.cells[k][l] -= eta * (-2 * alphaH * err_il * S.cells[i][k] + lambdaP * P.cells[k][l]); } biasP[l] -= eta * (-2 * alphaH * err_il); if (idx % YUpdatefrequency == 0) { if (alphaY > 0) { i = YObserved.get(idxY).row; l = YObserved.get(idxY).col; double err_i = YExtended.cells[i][l] - Sigmoid.Calculate( MatrixUtilities.getRowByColumnProduct(S, i, W, l)); // + biasW[l]); for (int k = 0; k < D; k++) { S.cells[i][k] -= eta * (alphaY * -err_i * W.cells[k][l] + lambdaS * S.cells[i][k]); W.cells[k][l] -= eta * (alphaY * -err_i * S.cells[i][k] + lambdaW * W.cells[k][l]); } biasW[l] -= eta * (-alphaY * err_i); idxY = (idxY + 1) % YObserved.size(); } } } } double lossH = GetLossH(); if (epoch % 3 == 0) { // compute the losses of each relation and print the result double lossYTrain = GetLossY(0, numTrainInstances), lossYTest = GetLossY(numTrainInstances, numTotalInstances), mcrTrain = GetErrorRate(0, numTrainInstances), mcrTest = GetErrorRate(numTrainInstances, numTotalInstances), mcrNN = GetTestErrorNN(); Logging.println( "Epoch=" + df.format(epoch) + ", Eta=" + df.format(eta) + ", LH=" + df.format(lossH) + ", LY=" + df.format(lossYTrain) + "/" + df.format(lossYTest) + ", MCR=" + df.format(mcrTrain) + "/" + df.format(mcrTest) + "/" + df.format(mcrNN), LogLevel.DEBUGGING_LOG); // Logging.println("LX="+lossX+", LH="+lossH + ", LY="+lossY+", MCR=["+ // mcrTrain+","+mcrTest+"]", LogLevel.DEBUGGING_LOG); } if (lossH < prevLossH) { // eta *= 1.01; prevLossH = lossH; } else { // eta *= 0.7; } } // return the ultimate MCR return GetErrorRate(numTrainInstances, numTotalInstances); // return GetTestErrorNN(); }