// compute the MCR on the test set private double GetMCRTestSet() { int numErrors = 0; for (int i = NTrain; i < NTrain + NTest; i++) { double label_i = Sigmoid.Calculate(Predict(i)); if ((Y.get(i) == 1 && label_i < 0.5) || (Y.get(i) == 0 && label_i >= 0.5)) numErrors++; } return (double) numErrors / (double) NTest; }
// the the MSE of the H loss public double GetLossH() { double numInstances = 0; double errorSum = 0; for (int i = 0; i < numTotalInstances; i++) for (int l = 0; l < numPatterns; l++) { if (H.get(i, l) != GlobalValues.MISSING_VALUE) { double err = H.get(i, l) - MatrixUtilities.getRowByColumnProduct(S, i, P, l); errorSum += err * err; numInstances += 1.0; } } return errorSum / numInstances; }
// initialize the data structures public void Initialize() { Logging.println("NTrain=" + NTrain + ", NTest=" + NTest + ", M_i=" + M, LogLevel.DEBUGGING_LOG); Logging.println("K=" + K + ", L=" + L, LogLevel.DEBUGGING_LOG); Logging.println("eta=" + eta + ", maxIter=" + maxIter, LogLevel.DEBUGGING_LOG); Logging.println( "lambdaD=" + lambdaD + ", lambdaF=" + lambdaP + ", lamdaW=" + lambdaW + ", beta=" + beta, LogLevel.DEBUGGING_LOG); // avoid zero/negative sliding window increments, // or increments greater than the window size if (deltaT < 1) deltaT = 1; else if (deltaT > L) deltaT = L; NSegments = (M - L) / deltaT; Logging.println("deltaT=" + deltaT + ", NSegments=" + NSegments, LogLevel.DEBUGGING_LOG); cR = beta / (NSegments * L); cA = (1 - beta); SegmentTimeSeriesDataset(); InitializePatternsProbabilityDistance(); InitializeHardMembershipsToClosestPattern(); // set the labels to be binary 0 and 1, needed for the logistic loss for (int i = 0; i < NTrain + NTest; i++) if (Y.get(i) != 1.0) Y.set(i, 0, 0.0); InitializeWeights(); Logging.println("Initializations Completed!", LogLevel.DEBUGGING_LOG); }
// initialize the matrices public void Initialize() { // compute the histogram matrix ComputeHistogram(); // create the extended Y YExtended = new Matrix(numTotalInstances, numLabels); // set all the cells to zero initially for (int i = 0; i < numTrainInstances; i++) for (int l = 0; l < numLabels; l++) YExtended.set(i, l, 0.0); // set to 1 only the column corresponding to the label for (int i = 0; i < numTotalInstances; i++) YExtended.set(i, (int) Y.get(i), 1.0); // randomly initialize the latent matrices S = new Matrix(numTotalInstances, D); S.RandomlyInitializeCells(0, 1); P = new Matrix(D, numPatterns); P.RandomlyInitializeCells(0, 1); biasP = new double[numPatterns]; for (int l = 0; l < numPatterns; l++) biasP[l] = H.GetColumnMean(l); W = new Matrix(D, numLabels); W.RandomlyInitializeCells(0, 1); biasW = new double[numLabels]; for (int l = 0; l < numLabels; l++) biasW[l] = YExtended.GetColumnMean(l); // record the observed histogram values HObserved = new ArrayList<Tripple>(); for (int i = 0; i < H.getDimRows(); i++) for (int j = 0; j < H.getDimColumns(); j++) if (H.get(i, j) != GlobalValues.MISSING_VALUE) HObserved.add(new Tripple(i, j)); Collections.shuffle(HObserved); // record the observed label values YObserved = new ArrayList<Tripple>(); for (int i = 0; i < numTrainInstances; i++) for (int l = 0; l < YExtended.getDimColumns(); l++) if (YExtended.get(i, l) != GlobalValues.MISSING_VALUE) YObserved.add(new Tripple(i, l)); Collections.shuffle(YObserved); }
// get the log loss of the target prediction public double GetErrorRate(int startIndex, int endIndex) { int numIncorrectClassifications = 0; int numInstances = 0; for (int i = startIndex; i < endIndex; i++) { if (Y.get(i) != GlobalValues.MISSING_VALUE) { double y = Y.get(i); double y_predicted = PredictLabel(i); if (y != y_predicted) numIncorrectClassifications++; numInstances++; } } return (double) numIncorrectClassifications / (double) numInstances; }
// get the log loss of the target prediction public double GetLossY(int startIndex, int endIndex) { double YTrainLoss = 0; int numObservedCells = 0; for (int i = startIndex; i < endIndex; i++) for (int l = 0; l < numLabels; l++) if (YExtended.get(i, l) != GlobalValues.MISSING_VALUE) { double y_hat_i = Sigmoid.Calculate( MatrixUtilities.getRowByColumnProduct(S, i, W, l)); // + biasW[l]); double y_i = YExtended.get(i, l); YTrainLoss += -y_i * Math.log(y_hat_i) - (1 - y_i) * Math.log(1 - y_hat_i); numObservedCells++; } return YTrainLoss / (double) numObservedCells; }
// partition the time series into segments public void SegmentTimeSeriesDataset() { S = new double[NTrain + NTest][NSegments][L]; for (int i = 0; i < NTrain + NTest; i++) { for (int j = 0; j < NSegments; j++) { for (int l = 0; l < L; l++) S[i][j][l] = T.get(i, (j * deltaT) + l); // normalize the segment double[] normalizedSegment = StatisticalUtilities.Normalize(S[i][j]); for (int l = 0; l < L; l++) S[i][j][l] = normalizedSegment[l]; } } Logging.println("Partion to Normalized Segments Completed!", LogLevel.DEBUGGING_LOG); }
public void LearnLA(boolean updateOnlyW) { if (beta == 1) return; double e_i = 0, F_ik = 0; double regWConst = (2 * lambdaW) / NTrain; for (int i = 0; i < NTrain; i++) { e_i = Y.get(i) - Sigmoid.Calculate(Predict(i)); for (int k = 0; k < K; k++) { F_ik = 0; for (int j = 0; j < NSegments; j++) { D[i][j][k] -= eta * ((1 - beta) * -2 * e_i * W[k] + lambdaD * D[i][j][k]); F_ik += D[i][j][k]; } W[k] -= eta * ((1 - beta) * -2 * e_i * F_ik + regWConst * W[k]); } } }
// compute the histogram matrix public void ComputeHistogram() { BagOfPatterns bop = new BagOfPatterns(); bop.representationType = RepresentationType.Polynomial; bop.slidingWindowSize = slidingWindowSize; bop.representationType = RepresentationType.Polynomial; bop.innerDimension = innerDimension; bop.alphabetSize = alphabetSize; bop.polyDegree = degree; H = bop.CreateWordFrequenciesMatrix(X); numPatterns = H.getDimColumns(); for (int i = 0; i < H.getDimRows(); i++) for (int j = 0; j < H.getDimColumns(); j++) if (H.get(i, j) == 0) { // H.set(i, j, GlobalValues.MISSING_VALUE); } Logging.println("Histogram Sparsity: " + H.GetSparsityRatio(), LogLevel.DEBUGGING_LOG); }
// compute the accuracy loss of instance i according to the // smooth hinge loss public double AccuracyLoss(int i) { double Y_hat_i = Predict(i); double sig_y_i = Sigmoid.Calculate(Y_hat_i); return -Y.get(i) * Math.log(sig_y_i) - (1 - Y.get(i)) * Math.log(1 - sig_y_i); }