// compute the MCR on the test set
  private double GetMCRTestSet() {
    int numErrors = 0;

    for (int i = NTrain; i < NTrain + NTest; i++) {
      double label_i = Sigmoid.Calculate(Predict(i));

      if ((Y.get(i) == 1 && label_i < 0.5) || (Y.get(i) == 0 && label_i >= 0.5)) numErrors++;
    }

    return (double) numErrors / (double) NTest;
  }
Пример #2
0
  // the the MSE of the H loss
  public double GetLossH() {
    double numInstances = 0;
    double errorSum = 0;

    for (int i = 0; i < numTotalInstances; i++)
      for (int l = 0; l < numPatterns; l++) {
        if (H.get(i, l) != GlobalValues.MISSING_VALUE) {
          double err = H.get(i, l) - MatrixUtilities.getRowByColumnProduct(S, i, P, l);
          errorSum += err * err;
          numInstances += 1.0;
        }
      }

    return errorSum / numInstances;
  }
  // initialize the data structures
  public void Initialize() {
    Logging.println("NTrain=" + NTrain + ", NTest=" + NTest + ", M_i=" + M, LogLevel.DEBUGGING_LOG);
    Logging.println("K=" + K + ", L=" + L, LogLevel.DEBUGGING_LOG);
    Logging.println("eta=" + eta + ", maxIter=" + maxIter, LogLevel.DEBUGGING_LOG);
    Logging.println(
        "lambdaD=" + lambdaD + ", lambdaF=" + lambdaP + ", lamdaW=" + lambdaW + ", beta=" + beta,
        LogLevel.DEBUGGING_LOG);

    // avoid zero/negative sliding window increments,
    // or increments greater than the window size
    if (deltaT < 1) deltaT = 1;
    else if (deltaT > L) deltaT = L;

    NSegments = (M - L) / deltaT;

    Logging.println("deltaT=" + deltaT + ", NSegments=" + NSegments, LogLevel.DEBUGGING_LOG);

    cR = beta / (NSegments * L);
    cA = (1 - beta);

    SegmentTimeSeriesDataset();
    InitializePatternsProbabilityDistance();
    InitializeHardMembershipsToClosestPattern();

    // set the labels to be binary 0 and 1, needed for the logistic loss
    for (int i = 0; i < NTrain + NTest; i++) if (Y.get(i) != 1.0) Y.set(i, 0, 0.0);

    InitializeWeights();

    Logging.println("Initializations Completed!", LogLevel.DEBUGGING_LOG);
  }
Пример #4
0
  // initialize the matrices
  public void Initialize() {
    // compute the histogram matrix
    ComputeHistogram();

    // create the extended Y
    YExtended = new Matrix(numTotalInstances, numLabels);

    // set all the cells to zero initially
    for (int i = 0; i < numTrainInstances; i++)
      for (int l = 0; l < numLabels; l++) YExtended.set(i, l, 0.0);

    // set to 1 only the column corresponding to the label
    for (int i = 0; i < numTotalInstances; i++) YExtended.set(i, (int) Y.get(i), 1.0);

    // randomly initialize the latent matrices
    S = new Matrix(numTotalInstances, D);
    S.RandomlyInitializeCells(0, 1);

    P = new Matrix(D, numPatterns);
    P.RandomlyInitializeCells(0, 1);

    biasP = new double[numPatterns];
    for (int l = 0; l < numPatterns; l++) biasP[l] = H.GetColumnMean(l);

    W = new Matrix(D, numLabels);
    W.RandomlyInitializeCells(0, 1);

    biasW = new double[numLabels];
    for (int l = 0; l < numLabels; l++) biasW[l] = YExtended.GetColumnMean(l);

    // record the observed histogram values
    HObserved = new ArrayList<Tripple>();
    for (int i = 0; i < H.getDimRows(); i++)
      for (int j = 0; j < H.getDimColumns(); j++)
        if (H.get(i, j) != GlobalValues.MISSING_VALUE) HObserved.add(new Tripple(i, j));

    Collections.shuffle(HObserved);

    // record the observed label values
    YObserved = new ArrayList<Tripple>();
    for (int i = 0; i < numTrainInstances; i++)
      for (int l = 0; l < YExtended.getDimColumns(); l++)
        if (YExtended.get(i, l) != GlobalValues.MISSING_VALUE) YObserved.add(new Tripple(i, l));

    Collections.shuffle(YObserved);
  }
Пример #5
0
  // get the log loss of the target prediction
  public double GetErrorRate(int startIndex, int endIndex) {
    int numIncorrectClassifications = 0;
    int numInstances = 0;

    for (int i = startIndex; i < endIndex; i++) {
      if (Y.get(i) != GlobalValues.MISSING_VALUE) {
        double y = Y.get(i);
        double y_predicted = PredictLabel(i);

        if (y != y_predicted) numIncorrectClassifications++;

        numInstances++;
      }
    }

    return (double) numIncorrectClassifications / (double) numInstances;
  }
Пример #6
0
  // get the log loss of the target prediction
  public double GetLossY(int startIndex, int endIndex) {
    double YTrainLoss = 0;
    int numObservedCells = 0;

    for (int i = startIndex; i < endIndex; i++)
      for (int l = 0; l < numLabels; l++)
        if (YExtended.get(i, l) != GlobalValues.MISSING_VALUE) {
          double y_hat_i =
              Sigmoid.Calculate(
                  MatrixUtilities.getRowByColumnProduct(S, i, W, l)); // + biasW[l]);
          double y_i = YExtended.get(i, l);

          YTrainLoss += -y_i * Math.log(y_hat_i) - (1 - y_i) * Math.log(1 - y_hat_i);

          numObservedCells++;
        }

    return YTrainLoss / (double) numObservedCells;
  }
  // partition the time series into segments
  public void SegmentTimeSeriesDataset() {
    S = new double[NTrain + NTest][NSegments][L];

    for (int i = 0; i < NTrain + NTest; i++) {
      for (int j = 0; j < NSegments; j++) {
        for (int l = 0; l < L; l++) S[i][j][l] = T.get(i, (j * deltaT) + l);

        // normalize the segment
        double[] normalizedSegment = StatisticalUtilities.Normalize(S[i][j]);
        for (int l = 0; l < L; l++) S[i][j][l] = normalizedSegment[l];
      }
    }

    Logging.println("Partion to Normalized Segments Completed!", LogLevel.DEBUGGING_LOG);
  }
  public void LearnLA(boolean updateOnlyW) {
    if (beta == 1) return;

    double e_i = 0, F_ik = 0;
    double regWConst = (2 * lambdaW) / NTrain;

    for (int i = 0; i < NTrain; i++) {
      e_i = Y.get(i) - Sigmoid.Calculate(Predict(i));

      for (int k = 0; k < K; k++) {
        F_ik = 0;

        for (int j = 0; j < NSegments; j++) {
          D[i][j][k] -= eta * ((1 - beta) * -2 * e_i * W[k] + lambdaD * D[i][j][k]);

          F_ik += D[i][j][k];
        }

        W[k] -= eta * ((1 - beta) * -2 * e_i * F_ik + regWConst * W[k]);
      }
    }
  }
Пример #9
0
  // compute the histogram matrix
  public void ComputeHistogram() {
    BagOfPatterns bop = new BagOfPatterns();

    bop.representationType = RepresentationType.Polynomial;

    bop.slidingWindowSize = slidingWindowSize;

    bop.representationType = RepresentationType.Polynomial;
    bop.innerDimension = innerDimension;
    bop.alphabetSize = alphabetSize;
    bop.polyDegree = degree;

    H = bop.CreateWordFrequenciesMatrix(X);
    numPatterns = H.getDimColumns();

    for (int i = 0; i < H.getDimRows(); i++)
      for (int j = 0; j < H.getDimColumns(); j++)
        if (H.get(i, j) == 0) {
          // H.set(i, j, GlobalValues.MISSING_VALUE);
        }

    Logging.println("Histogram Sparsity: " + H.GetSparsityRatio(), LogLevel.DEBUGGING_LOG);
  }
  // compute the accuracy loss of instance i according to the
  // smooth hinge loss
  public double AccuracyLoss(int i) {
    double Y_hat_i = Predict(i);
    double sig_y_i = Sigmoid.Calculate(Y_hat_i);

    return -Y.get(i) * Math.log(sig_y_i) - (1 - Y.get(i)) * Math.log(1 - sig_y_i);
  }