예제 #1
0
  /**
   * Calculate similarity between two items, based on the global base SVD.
   *
   * @param idx1 The first item's ID.
   * @param idx2 The second item's ID.
   * @return The similarity value between two items idx1 and idx2.
   */
  private static double getItemSimilarity(int idx1, int idx2) {
    double sim;
    if (idx1 <= idx2) {
      sim = itemSimilarity.getValue(idx1, idx2);
    } else {
      sim = itemSimilarity.getValue(idx2, idx1);
    }

    if (sim == 0.0) {
      SparseVector i_vec = baseline.getV().getColRef(idx1);
      SparseVector j_vec = baseline.getV().getColRef(idx2);

      sim =
          1 - 2.0 / Math.PI * Math.acos(i_vec.innerProduct(j_vec) / (i_vec.norm() * j_vec.norm()));

      if (Double.isNaN(sim)) {
        sim = 0.0;
      }

      if (idx1 <= idx2) {
        itemSimilarity.setValue(idx1, idx2, sim);
      } else {
        itemSimilarity.setValue(idx2, idx1, sim);
      }
    }

    return sim;
  }
예제 #2
0
  /**
   * Calculate similarity between two users, based on the global base SVD.
   *
   * @param idx1 The first user's ID.
   * @param idx2 The second user's ID.
   * @return The similarity value between two users idx1 and idx2.
   */
  private static double getUserSimilarity(int idx1, int idx2) {
    double sim;
    if (idx1 <= idx2) {
      sim = userSimilarity.getValue(idx1, idx2);
    } else {
      sim = userSimilarity.getValue(idx2, idx1);
    }

    if (sim == 0.0) {
      SparseVector u_vec = baseline.getU().getRowRef(idx1);
      SparseVector v_vec = baseline.getU().getRowRef(idx2);

      sim =
          1 - 2.0 / Math.PI * Math.acos(u_vec.innerProduct(v_vec) / (u_vec.norm() * v_vec.norm()));

      if (Double.isNaN(sim)) {
        sim = 0.0;
      }

      if (idx1 <= idx2) {
        userSimilarity.setValue(idx1, idx2, sim);
      } else {
        userSimilarity.setValue(idx2, idx1, sim);
      }
    }

    return sim;
  }
예제 #3
0
  /**
   * Items which will be used for validation purpose are moved from rateMatrix to validationMatrix.
   *
   * @param validationRatio Proportion of dataset, using for validation purpose.
   */
  private void makeValidationSet(SparseMatrix rateMatrix, double validationRatio) {
    validationMatrix = new SparseMatrix(userCount + 1, itemCount + 1);

    int validationCount = (int) (rateMatrix.itemCount() * validationRatio);
    while (validationCount > 0) {
      int index = (int) (Math.random() * userCount) + 1;
      SparseVector row = rateMatrix.getRowRef(index);
      int[] itemList = row.indexList();

      if (itemList != null && itemList.length > 5) {
        int index2 = (int) (Math.random() * itemList.length);
        validationMatrix.setValue(
            index, itemList[index2], rateMatrix.getValue(index, itemList[index2]));
        rateMatrix.setValue(index, itemList[index2], 0.0);

        validationCount--;
      }
    }
  }
예제 #4
0
  /** Items in validationMatrix are moved to original rateMatrix. */
  private void restoreValidationSet(SparseMatrix rateMatrix) {
    for (int i = 1; i <= userCount; i++) {
      SparseVector row = validationMatrix.getRowRef(i);
      int[] itemList = row.indexList();

      if (itemList != null) {
        for (int j : itemList) {
          rateMatrix.setValue(i, j, validationMatrix.getValue(i, j));
        }
      }
    }
  }
예제 #5
0
  /**
   * Evaluate the designated algorithm with the given test data.
   *
   * @param testMatrix The rating matrix with test data.
   * @return The result of evaluation, such as MAE, RMSE, and rank-score.
   */
  @Override
  public EvaluationMetrics evaluate(SparseMatrix testMatrix) {
    SparseMatrix predicted = new SparseMatrix(userCount + 1, itemCount + 1);

    for (int u = 1; u <= userCount; u++) {
      SparseVector items = testMatrix.getRowRef(u);
      int[] itemIndexList = items.indexList();

      if (itemIndexList != null) {
        for (int i : itemIndexList) {
          double prediction = 0.0;
          for (int l = 0; l < modelMax; l++) {
            prediction +=
                localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i))
                    * KernelSmoothing.kernelize(
                        getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                    * KernelSmoothing.kernelize(
                        getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                    / weightSum[u][i];
          }

          if (Double.isNaN(prediction) || prediction == 0.0) {
            prediction = (maxValue + minValue) / 2;
          }

          if (prediction < minValue) {
            prediction = minValue;
          } else if (prediction > maxValue) {
            prediction = maxValue;
          }

          predicted.setValue(u, i, prediction);
        }
      }
    }

    return new EvaluationMetrics(testMatrix, predicted, maxValue, minValue);
  }
예제 #6
0
  /**
   * Build a model with given training set.
   *
   * @param rateMatrix The rating matrix with train data.
   */
  @Override
  public void buildModel(SparseMatrix rateMatrix) {
    makeValidationSet(rateMatrix, validationRatio);

    // Preparing data structures:
    localUserFeatures = new SparseMatrix[modelMax];
    localItemFeatures = new SparseMatrix[modelMax];

    anchorUser = new int[modelMax];
    anchorItem = new int[modelMax];

    for (int l = 0; l < modelMax; l++) {
      boolean done = false;
      while (!done) {
        int u_t = (int) Math.floor(Math.random() * userCount) + 1;
        int[] itemList = rateMatrix.getRow(u_t).indexList();

        if (itemList != null) {
          int idx = (int) Math.floor(Math.random() * itemList.length);
          int i_t = itemList[idx];

          anchorUser[l] = u_t;
          anchorItem[l] = i_t;

          done = true;
        }
      }
    }

    // Pre-calculating similarity:
    userSimilarity = new SparseMatrix(userCount + 1, userCount + 1);
    itemSimilarity = new SparseMatrix(itemCount + 1, itemCount + 1);

    weightSum = new double[userCount + 1][itemCount + 1];
    for (int u = 1; u <= userCount; u++) {
      for (int i = 1; i <= itemCount; i++) {
        for (int l = 0; l < modelMax; l++) {
          weightSum[u][i] +=
              KernelSmoothing.kernelize(
                      getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                  * KernelSmoothing.kernelize(
                      getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType);
        }
      }
    }

    // Initialize local models:
    for (int l = 0; l < modelMax; l++) {
      localUserFeatures[l] = new SparseMatrix(userCount + 1, featureCount);
      localItemFeatures[l] = new SparseMatrix(featureCount, itemCount + 1);

      for (int u = 1; u <= userCount; u++) {
        for (int r = 0; r < featureCount; r++) {
          double rdm = Math.random();
          localUserFeatures[l].setValue(u, r, rdm);
        }
      }
      for (int i = 1; i <= itemCount; i++) {
        for (int r = 0; r < featureCount; r++) {
          double rdm = Math.random();
          localItemFeatures[l].setValue(r, i, rdm);
        }
      }
    }

    // Learn by gradient descent:
    int round = 0;
    double prevErr = 99999;
    double currErr = 9999;

    while (Math.abs(prevErr - currErr) > 0.001 && round < maxIter) {
      for (int u = 1; u <= userCount; u++) {
        SparseVector items = rateMatrix.getRowRef(u);
        int[] itemIndexList = items.indexList();

        if (itemIndexList != null) {
          for (int i : itemIndexList) {
            // current estimation:
            double RuiEst = 0.0;
            for (int l = 0; l < modelMax; l++) {
              RuiEst +=
                  localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i))
                      * KernelSmoothing.kernelize(
                          getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                      * KernelSmoothing.kernelize(
                          getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                      / weightSum[u][i];
            }
            double RuiReal = rateMatrix.getValue(u, i);
            double err = RuiReal - RuiEst;

            // parameter update:
            for (int l = 0; l < modelMax; l++) {
              double weight =
                  KernelSmoothing.kernelize(
                          getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                      * KernelSmoothing.kernelize(
                          getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                      / weightSum[u][i];

              for (int r = 0; r < featureCount; r++) {
                double Fus = localUserFeatures[l].getValue(u, r);
                double Gis = localItemFeatures[l].getValue(r, i);

                localUserFeatures[l].setValue(
                    u, r, Fus + learningRate * (err * Gis * weight - regularizer * Fus));
                if (Double.isNaN(Fus + learningRate * (err * Gis * weight - regularizer * Fus))) {
                  // System.out.println("a");
                }
                localItemFeatures[l].setValue(
                    r, i, Gis + learningRate * (err * Fus * weight - regularizer * Gis));
                if (Double.isNaN(Gis + learningRate * (err * Fus * weight - regularizer * Gis))) {
                  // System.out.println("b");
                }
              }
            }
          }
        }
      }

      // Intermediate evaluation for trend graphing:
      SparseMatrix predicted = new SparseMatrix(userCount + 1, itemCount + 1);

      for (int u = 1; u <= userCount; u++) {
        SparseVector items = validationMatrix.getRowRef(u);
        int[] itemIndexList = items.indexList();

        if (itemIndexList != null) {
          for (int i : itemIndexList) {
            double prediction = 0.0;
            for (int l = 0; l < modelMax; l++) {
              prediction +=
                  localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i))
                      * KernelSmoothing.kernelize(
                          getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                      * KernelSmoothing.kernelize(
                          getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                      / weightSum[u][i];
            }

            if (Double.isNaN(prediction) || prediction == 0.0) {
              prediction = (maxValue + minValue) / 2;
            }

            if (prediction < minValue) {
              prediction = minValue;
            } else if (prediction > maxValue) {
              prediction = maxValue;
            }

            predicted.setValue(u, i, prediction);
          }
        }
      }

      EvaluationMetrics e = new EvaluationMetrics(validationMatrix, predicted, maxValue, minValue);

      prevErr = currErr;
      currErr = e.getRMSE();

      round++;

      // Show progress:
      if (showProgress) {
        System.out.println(round + "\t" + e.printOneLine());
      }
    }

    restoreValidationSet(rateMatrix);
  }