/**
   * Split ratings into two parts: (ratio) training, (1-ratio) test subsets.
   *
   * @param ratio the ratio of training data over all the ratings.
   */
  public SparseMatrix[] getRatioByRating(double ratio) {

    assert (ratio > 0 && ratio < 1);

    SparseMatrix trainMatrix = new SparseMatrix(rateMatrix);
    SparseMatrix testMatrix = new SparseMatrix(rateMatrix);

    for (int u = 0, um = rateMatrix.numRows(); u < um; u++) {

      SparseVector uv = rateMatrix.row(u);
      for (int j : uv.getIndex()) {

        double rdm = Math.random();
        if (rdm < ratio) testMatrix.set(u, j, 0.0);
        else trainMatrix.set(u, j, 0.0);
      }
    }

    // remove zero entries
    SparseMatrix.reshape(trainMatrix);
    SparseMatrix.reshape(testMatrix);

    debugInfo(trainMatrix, testMatrix, -1);

    return new SparseMatrix[] {trainMatrix, testMatrix};
  }
  /**
   * Return the k-th fold as test set (testMatrix), making all the others as train set in
   * rateMatrix.
   *
   * @param k The index for desired fold.
   * @return Rating matrices {k-th train data, k-th test data}
   */
  public SparseMatrix[] getKthFold(int k) {
    if (k > numFold || k < 1) return null;

    SparseMatrix trainMatrix = new SparseMatrix(rateMatrix);
    SparseMatrix testMatrix = new SparseMatrix(rateMatrix);

    for (int u = 0, um = rateMatrix.numRows(); u < um; u++) {

      SparseVector items = rateMatrix.row(u);

      for (int j : items.getIndex()) {
        if (assignMatrix.get(u, j) == k)
          trainMatrix.set(u, j, 0.0); // keep test data and remove train data
        else testMatrix.set(u, j, 0.0); // keep train data and remove test data
      }
    }

    // remove zero entries
    SparseMatrix.reshape(trainMatrix);
    SparseMatrix.reshape(testMatrix);

    debugInfo(trainMatrix, testMatrix, k);

    return new SparseMatrix[] {trainMatrix, testMatrix};
  }
Beispiel #3
0
  @Override
  protected void buildModel() throws Exception {

    for (int iter = 1; iter <= numIters; iter++) {

      loss = 0;
      errs = 0;
      for (int s = 0, smax = numUsers * 100; s < smax; s++) {

        // randomly draw (u, i, j)
        int u = 0, i = 0, j = 0;

        while (true) {
          u = Randoms.uniform(numUsers);
          SparseVector pu = userCache.get(u);

          if (pu.getCount() == 0) continue;

          int[] is = pu.getIndex();
          i = is[Randoms.uniform(is.length)];

          do {
            j = Randoms.uniform(numItems);
          } while (pu.contains(j));

          break;
        }

        // update parameters
        double xui = predict(u, i);
        double xuj = predict(u, j);
        double xuij = xui - xuj;

        double vals = -Math.log(g(xuij));
        loss += vals;
        errs += vals;

        double cmg = g(-xuij);

        for (int f = 0; f < numFactors; f++) {
          double puf = P.get(u, f);
          double qif = Q.get(i, f);
          double qjf = Q.get(j, f);

          P.add(u, f, lRate * (cmg * (qif - qjf) - regU * puf));
          Q.add(i, f, lRate * (cmg * puf - regI * qif));
          Q.add(j, f, lRate * (cmg * (-puf) - regI * qjf));

          loss += regU * puf * puf + regI * qif * qif + regI * qjf * qjf;
        }
      }

      if (isConverged(iter)) break;
    }
  }
Beispiel #4
0
  @Override
  protected double predict(int u, int j) {

    // find a number of similar items
    Map<Integer, Double> nns = new HashMap<>();
    SparseVector ui = trainMatrix.row(u);
    for (int i : ui.getIndex()) {
      if (itemCorrsmap.get(j).containsKey(i)) {
        nns.put(i, itemCorrsmap.get(j).get(i));
      }
    }
    //		for (int i : dv.getIndex()) {
    //			double sim = dv.get(i);
    //			//用户u对商品i也购买过
    //			double rate = trainMatrix.get(u, i);
    //
    //			if (isRankingPred && rate > 0){
    //				nns.put(i, sim);
    //			}
    //			else if (sim > 0 && rate > 0)
    //				nns.put(i, sim);
    //		}

    // topN similar items
    if (knn > 0 && knn < nns.size()) {
      List<KeyValPair<Integer>> sorted = Lists.sortMap(nns, true);
      List<KeyValPair<Integer>> subset = sorted.subList(0, knn);
      nns.clear();
      for (KeyValPair<Integer> kv : subset) nns.put(kv.getKey(), kv.getValue());
    }

    if (nns.size() == 0) return isRankingPred ? 0 : globalMean;

    if (isRankingPred) {
      // for recommendation task: item ranking
      return Stats.sum(nns.values());
    } else {
      // for recommendation task: rating prediction

      double sum = 0, ws = 0;
      for (Entry<Integer, Double> en : nns.entrySet()) {
        int i = en.getKey();
        double sim = en.getValue();
        double rate = trainMatrix.get(u, i);

        sum += sim * (rate - itemMeans.get(i));
        ws += Math.abs(sim);
      }

      return ws > 0 ? itemMeans.get(j) + sum / ws : globalMean;
    }
  }
Beispiel #5
0
 @Override
 protected void initModel() throws Exception {
   itemCorrs = buildCorrs(false);
   itemMeans = new DenseVector(numItems);
   for (int i = 0; i < numItems; i++) {
     SparseVector vs = trainMatrix.column(i);
     itemMeans.set(i, vs.getCount() > 0 ? vs.mean() : globalMean);
   }
   for (int i = 0; i < numItems; i++) {
     SparseVector dv = itemCorrs.row(i);
     Map<Integer, Double> temp = new HashMap<>();
     for (VectorEntry entry : dv) {
       temp.put(entry.index(), entry.get());
     }
     itemCorrsmap.put(i, temp);
   }
 }
Beispiel #6
0
  @Override
  protected void buildModel() throws Exception {
    for (int iter = 1; iter <= numIters; iter++) {

      // update W by fixing H
      for (int u = 0; u < W.numRows(); u++) {
        SparseVector uv = V.row(u);

        if (uv.getCount() > 0) {
          SparseVector euv = new SparseVector(V.numColumns());

          for (int j : uv.getIndex()) euv.set(j, predict(u, j));

          for (int f = 0; f < W.numColumns(); f++) {
            DenseVector fv = H.row(f, false);
            double real = fv.inner(uv);
            double estm = fv.inner(euv) + 1e-9;

            W.set(u, f, W.get(u, f) * (real / estm));
          }
        }
      }

      // update H by fixing W
      DenseMatrix trW = W.transpose();
      for (int j = 0; j < H.numColumns(); j++) {
        SparseVector jv = V.column(j);

        if (jv.getCount() > 0) {
          SparseVector ejv = new SparseVector(V.numRows());

          for (int u : jv.getIndex()) ejv.set(u, predict(u, j));

          for (int f = 0; f < H.numRows(); f++) {
            DenseVector fv = trW.row(f, false);
            double real = fv.inner(jv);
            double estm = fv.inner(ejv) + 1e-9;

            H.set(f, j, H.get(f, j) * (real / estm));
          }
        }
      }

      // compute errors
      loss = 0;
      errs = 0;
      for (MatrixEntry me : V) {
        int u = me.row();
        int j = me.column();
        double ruj = me.get();

        if (ruj > 0) {
          double euj = predict(u, j) - ruj;

          errs += euj * euj;
          loss += euj * euj;
        }
      }

      errs *= 0.5;
      loss *= 0.5;

      if (isConverged(iter)) break;
    }
  }
Beispiel #7
0
  @Override
  protected void buildModel() throws Exception {

    // Initialize hierarchical priors
    int beta = 2; // observation noise (precision)
    DenseVector mu_u = new DenseVector(numFactors);
    DenseVector mu_m = new DenseVector(numFactors);

    // parameters of Inv-Whishart distribution
    DenseMatrix WI_u = DenseMatrix.eye(numFactors);
    int b0_u = 2;
    int df_u = numFactors;
    DenseVector mu0_u = new DenseVector(numFactors);

    DenseMatrix WI_m = DenseMatrix.eye(numFactors);
    int b0_m = 2;
    int df_m = numFactors;
    DenseVector mu0_m = new DenseVector(numFactors);

    // initializing Bayesian PMF using MAP solution found by PMF
    P = new DenseMatrix(numUsers, numFactors);
    Q = new DenseMatrix(numItems, numFactors);

    P.init(0, 1);
    Q.init(0, 1);

    for (int f = 0; f < numFactors; f++) {
      mu_u.set(f, P.columnMean(f));
      mu_m.set(f, Q.columnMean(f));
    }

    DenseMatrix alpha_u = P.cov().inv();
    DenseMatrix alpha_m = Q.cov().inv();

    // Iteration:
    DenseVector x_bar = new DenseVector(numFactors);
    DenseVector normalRdn = new DenseVector(numFactors);

    DenseMatrix S_bar, WI_post, lam;
    DenseVector mu_temp;
    double df_upost, df_mpost;

    int M = numUsers, N = numItems;

    for (int iter = 1; iter <= numIters; iter++) {

      // Sample from user hyper parameters:
      for (int f = 0; f < numFactors; f++) x_bar.set(f, P.columnMean(f));
      S_bar = P.cov();

      DenseVector mu0_u_x_bar = mu0_u.minus(x_bar);
      DenseMatrix e1e2 = mu0_u_x_bar.outer(mu0_u_x_bar).scale(M * b0_u / (b0_u + M + 0.0));
      WI_post = WI_u.inv().add(S_bar.scale(M)).add(e1e2);
      WI_post = WI_post.inv();
      WI_post = WI_post.add(WI_post.transpose()).scale(0.5);

      df_upost = df_u + M;
      DenseMatrix wishrnd_u = wishart(WI_post, df_upost);
      if (wishrnd_u != null) alpha_u = wishrnd_u;
      mu_temp = mu0_u.scale(b0_u).add(x_bar.scale(M)).scale(1 / (b0_u + M + 0.0));
      lam = alpha_u.scale(b0_u + M).inv().cholesky();

      if (lam != null) {
        lam = lam.transpose();

        for (int f = 0; f < numFactors; f++) normalRdn.set(f, Randoms.gaussian(0, 1));

        mu_u = lam.mult(normalRdn).add(mu_temp);
      }

      // Sample from item hyper parameters:
      for (int f = 0; f < numFactors; f++) x_bar.set(f, Q.columnMean(f));
      S_bar = Q.cov();

      DenseVector mu0_m_x_bar = mu0_m.minus(x_bar);
      DenseMatrix e3e4 = mu0_m_x_bar.outer(mu0_m_x_bar).scale(N * b0_m / (b0_m + N + 0.0));
      WI_post = WI_m.inv().add(S_bar.scale(N)).add(e3e4);
      WI_post = WI_post.inv();
      WI_post = WI_post.add(WI_post.transpose()).scale(0.5);

      df_mpost = df_m + N;
      DenseMatrix wishrnd_m = wishart(WI_post, df_mpost);
      if (wishrnd_m != null) alpha_m = wishrnd_m;
      mu_temp = mu0_m.scale(b0_m).add(x_bar.scale(N)).scale(1 / (b0_m + N + 0.0));
      lam = alpha_m.scale(b0_m + N).inv().cholesky();

      if (lam != null) {
        lam = lam.transpose();

        for (int f = 0; f < numFactors; f++) normalRdn.set(f, Randoms.gaussian(0, 1));

        mu_m = lam.mult(normalRdn).add(mu_temp);
      }

      // Gibbs updates over user and item feature vectors given hyper parameters:
      // NOTE: in PREA, only 1 iter for gibbs where in the original Matlab code, 2 iters are used.
      for (int gibbs = 0; gibbs < 2; gibbs++) {
        // Infer posterior distribution over all user feature vectors
        for (int u = 0; u < numUsers; u++) {
          // list of items rated by user uu:
          SparseVector rv = trainMatrix.row(u);
          int count = rv.getCount();

          if (count == 0) continue;

          // features of items rated by user uu:
          DenseMatrix MM = new DenseMatrix(count, numFactors);
          DenseVector rr = new DenseVector(count);
          int idx = 0;
          for (int j : rv.getIndex()) {
            rr.set(idx, rv.get(j) - globalMean);
            for (int f = 0; f < numFactors; f++) MM.set(idx, f, Q.get(j, f));

            idx++;
          }

          DenseMatrix covar = alpha_u.add((MM.transpose().mult(MM)).scale(beta)).inv();
          DenseVector a = MM.transpose().mult(rr).scale(beta);
          DenseVector b = alpha_u.mult(mu_u);
          DenseVector mean_u = covar.mult(a.add(b));
          lam = covar.cholesky();

          if (lam != null) {
            lam = lam.transpose();
            for (int f = 0; f < numFactors; f++) normalRdn.set(f, Randoms.gaussian(0, 1));

            DenseVector w1_P1_u = lam.mult(normalRdn).add(mean_u);

            for (int f = 0; f < numFactors; f++) P.set(u, f, w1_P1_u.get(f));
          }
        }

        // Infer posterior distribution over all movie feature vectors
        for (int j = 0; j < numItems; j++) {
          // list of users who rated item ii:
          SparseVector jv = trainMatrix.column(j);
          int count = jv.getCount();
          if (count == 0) continue;

          // features of users who rated item ii:
          DenseMatrix MM = new DenseMatrix(count, numFactors);
          DenseVector rr = new DenseVector(count);
          int idx = 0;
          for (int u : jv.getIndex()) {
            rr.set(idx, jv.get(u) - globalMean);
            for (int f = 0; f < numFactors; f++) MM.set(idx, f, P.get(u, f));

            idx++;
          }

          DenseMatrix covar = alpha_m.add((MM.transpose().mult(MM)).scale(beta)).inv();
          DenseVector a = MM.transpose().mult(rr).scale(beta);
          DenseVector b = alpha_m.mult(mu_m);
          DenseVector mean_m = covar.mult(a.add(b));
          lam = covar.cholesky();

          if (lam != null) {
            lam = lam.transpose();
            for (int f = 0; f < numFactors; f++) normalRdn.set(f, Randoms.gaussian(0, 1));

            DenseVector w1_M1_j = lam.mult(normalRdn).add(mean_m);

            for (int f = 0; f < numFactors; f++) Q.set(j, f, w1_M1_j.get(f));
          }
        }
      } // end of gibbs

      loss = 0;
      for (MatrixEntry me : trainMatrix) {
        int u = me.row();
        int j = me.column();
        double ruj = me.get();
        double pred = predict(u, j);
        double euj = ruj - pred;

        loss += euj * euj;
      }
      loss *= 0.5;

      if (isConverged(iter)) break;
    }
  }
Beispiel #8
0
  /**
   * Randomly sample a matrix from Wishart Distribution with the given parameters.
   *
   * @param scale scale parameter for Wishart Distribution.
   * @param df degree of freedom for Wishart Distribution.
   * @return the sample randomly drawn from the given distribution.
   */
  protected DenseMatrix wishart(DenseMatrix scale, double df) {
    DenseMatrix A = scale.cholesky();
    if (A == null) return null;

    int p = scale.numRows();
    DenseMatrix z = new DenseMatrix(p, p);

    for (int i = 0; i < p; i++) {
      for (int j = 0; j < p; j++) {
        z.set(i, j, Randoms.gaussian(0, 1));
      }
    }

    SparseVector y = new SparseVector(p);
    for (int i = 0; i < p; i++) y.set(i, Randoms.gamma((df - (i + 1)) / 2, 2));

    DenseMatrix B = new DenseMatrix(p, p);
    B.set(0, 0, y.get(0));

    if (p > 1) {
      // rest of diagonal:
      for (int j = 1; j < p; j++) {
        SparseVector zz = new SparseVector(j);
        for (int k = 0; k < j; k++) zz.set(k, z.get(k, j));

        B.set(j, j, y.get(j) + zz.inner(zz));
      }

      // first row and column:
      for (int j = 1; j < p; j++) {
        B.set(0, j, z.get(0, j) * Math.sqrt(y.get(0)));
        B.set(j, 0, B.get(0, j)); // mirror
      }
    }

    if (p > 2) {
      for (int j = 2; j < p; j++) {
        for (int i = 1; i <= j - 1; i++) {
          SparseVector zki = new SparseVector(i);
          SparseVector zkj = new SparseVector(i);

          for (int k = 0; k <= i - 1; k++) {
            zki.set(k, z.get(k, i));
            zkj.set(k, z.get(k, j));
          }
          B.set(i, j, z.get(i, j) * Math.sqrt(y.get(i)) + zki.inner(zkj));
          B.set(j, i, B.get(i, j)); // mirror
        }
      }
    }

    return A.transpose().mult(B).mult(A);
  }