예제 #1
0
  /** Items in validationMatrix are moved to original rateMatrix. */
  private void restoreValidationSet(SparseMatrix rateMatrix) {
    for (int i = 1; i <= userCount; i++) {
      SparseVector row = validationMatrix.getRowRef(i);
      int[] itemList = row.indexList();

      if (itemList != null) {
        for (int j : itemList) {
          rateMatrix.setValue(i, j, validationMatrix.getValue(i, j));
        }
      }
    }
  }
예제 #2
0
  /**
   * Items which will be used for validation purpose are moved from rateMatrix to validationMatrix.
   *
   * @param validationRatio Proportion of dataset, using for validation purpose.
   */
  private void makeValidationSet(SparseMatrix rateMatrix, double validationRatio) {
    validationMatrix = new SparseMatrix(userCount + 1, itemCount + 1);

    int validationCount = (int) (rateMatrix.itemCount() * validationRatio);
    while (validationCount > 0) {
      int index = (int) (Math.random() * userCount) + 1;
      SparseVector row = rateMatrix.getRowRef(index);
      int[] itemList = row.indexList();

      if (itemList != null && itemList.length > 5) {
        int index2 = (int) (Math.random() * itemList.length);
        validationMatrix.setValue(
            index, itemList[index2], rateMatrix.getValue(index, itemList[index2]));
        rateMatrix.setValue(index, itemList[index2], 0.0);

        validationCount--;
      }
    }
  }
예제 #3
0
  /**
   * Evaluate the designated algorithm with the given test data.
   *
   * @param testMatrix The rating matrix with test data.
   * @return The result of evaluation, such as MAE, RMSE, and rank-score.
   */
  @Override
  public EvaluationMetrics evaluate(SparseMatrix testMatrix) {
    SparseMatrix predicted = new SparseMatrix(userCount + 1, itemCount + 1);

    for (int u = 1; u <= userCount; u++) {
      SparseVector items = testMatrix.getRowRef(u);
      int[] itemIndexList = items.indexList();

      if (itemIndexList != null) {
        for (int i : itemIndexList) {
          double prediction = 0.0;
          for (int l = 0; l < modelMax; l++) {
            prediction +=
                localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i))
                    * KernelSmoothing.kernelize(
                        getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                    * KernelSmoothing.kernelize(
                        getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                    / weightSum[u][i];
          }

          if (Double.isNaN(prediction) || prediction == 0.0) {
            prediction = (maxValue + minValue) / 2;
          }

          if (prediction < minValue) {
            prediction = minValue;
          } else if (prediction > maxValue) {
            prediction = maxValue;
          }

          predicted.setValue(u, i, prediction);
        }
      }
    }

    return new EvaluationMetrics(testMatrix, predicted, maxValue, minValue);
  }
예제 #4
0
  /**
   * Build a model with given training set.
   *
   * @param rateMatrix The rating matrix with train data.
   */
  @Override
  public void buildModel(SparseMatrix rateMatrix) {
    makeValidationSet(rateMatrix, validationRatio);

    // Preparing data structures:
    localUserFeatures = new SparseMatrix[modelMax];
    localItemFeatures = new SparseMatrix[modelMax];

    anchorUser = new int[modelMax];
    anchorItem = new int[modelMax];

    for (int l = 0; l < modelMax; l++) {
      boolean done = false;
      while (!done) {
        int u_t = (int) Math.floor(Math.random() * userCount) + 1;
        int[] itemList = rateMatrix.getRow(u_t).indexList();

        if (itemList != null) {
          int idx = (int) Math.floor(Math.random() * itemList.length);
          int i_t = itemList[idx];

          anchorUser[l] = u_t;
          anchorItem[l] = i_t;

          done = true;
        }
      }
    }

    // Pre-calculating similarity:
    userSimilarity = new SparseMatrix(userCount + 1, userCount + 1);
    itemSimilarity = new SparseMatrix(itemCount + 1, itemCount + 1);

    weightSum = new double[userCount + 1][itemCount + 1];
    for (int u = 1; u <= userCount; u++) {
      for (int i = 1; i <= itemCount; i++) {
        for (int l = 0; l < modelMax; l++) {
          weightSum[u][i] +=
              KernelSmoothing.kernelize(
                      getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                  * KernelSmoothing.kernelize(
                      getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType);
        }
      }
    }

    // Initialize local models:
    for (int l = 0; l < modelMax; l++) {
      localUserFeatures[l] = new SparseMatrix(userCount + 1, featureCount);
      localItemFeatures[l] = new SparseMatrix(featureCount, itemCount + 1);

      for (int u = 1; u <= userCount; u++) {
        for (int r = 0; r < featureCount; r++) {
          double rdm = Math.random();
          localUserFeatures[l].setValue(u, r, rdm);
        }
      }
      for (int i = 1; i <= itemCount; i++) {
        for (int r = 0; r < featureCount; r++) {
          double rdm = Math.random();
          localItemFeatures[l].setValue(r, i, rdm);
        }
      }
    }

    // Learn by gradient descent:
    int round = 0;
    double prevErr = 99999;
    double currErr = 9999;

    while (Math.abs(prevErr - currErr) > 0.001 && round < maxIter) {
      for (int u = 1; u <= userCount; u++) {
        SparseVector items = rateMatrix.getRowRef(u);
        int[] itemIndexList = items.indexList();

        if (itemIndexList != null) {
          for (int i : itemIndexList) {
            // current estimation:
            double RuiEst = 0.0;
            for (int l = 0; l < modelMax; l++) {
              RuiEst +=
                  localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i))
                      * KernelSmoothing.kernelize(
                          getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                      * KernelSmoothing.kernelize(
                          getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                      / weightSum[u][i];
            }
            double RuiReal = rateMatrix.getValue(u, i);
            double err = RuiReal - RuiEst;

            // parameter update:
            for (int l = 0; l < modelMax; l++) {
              double weight =
                  KernelSmoothing.kernelize(
                          getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                      * KernelSmoothing.kernelize(
                          getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                      / weightSum[u][i];

              for (int r = 0; r < featureCount; r++) {
                double Fus = localUserFeatures[l].getValue(u, r);
                double Gis = localItemFeatures[l].getValue(r, i);

                localUserFeatures[l].setValue(
                    u, r, Fus + learningRate * (err * Gis * weight - regularizer * Fus));
                if (Double.isNaN(Fus + learningRate * (err * Gis * weight - regularizer * Fus))) {
                  // System.out.println("a");
                }
                localItemFeatures[l].setValue(
                    r, i, Gis + learningRate * (err * Fus * weight - regularizer * Gis));
                if (Double.isNaN(Gis + learningRate * (err * Fus * weight - regularizer * Gis))) {
                  // System.out.println("b");
                }
              }
            }
          }
        }
      }

      // Intermediate evaluation for trend graphing:
      SparseMatrix predicted = new SparseMatrix(userCount + 1, itemCount + 1);

      for (int u = 1; u <= userCount; u++) {
        SparseVector items = validationMatrix.getRowRef(u);
        int[] itemIndexList = items.indexList();

        if (itemIndexList != null) {
          for (int i : itemIndexList) {
            double prediction = 0.0;
            for (int l = 0; l < modelMax; l++) {
              prediction +=
                  localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i))
                      * KernelSmoothing.kernelize(
                          getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType)
                      * KernelSmoothing.kernelize(
                          getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType)
                      / weightSum[u][i];
            }

            if (Double.isNaN(prediction) || prediction == 0.0) {
              prediction = (maxValue + minValue) / 2;
            }

            if (prediction < minValue) {
              prediction = minValue;
            } else if (prediction > maxValue) {
              prediction = maxValue;
            }

            predicted.setValue(u, i, prediction);
          }
        }
      }

      EvaluationMetrics e = new EvaluationMetrics(validationMatrix, predicted, maxValue, minValue);

      prevErr = currErr;
      currErr = e.getRMSE();

      round++;

      // Show progress:
      if (showProgress) {
        System.out.println(round + "\t" + e.printOneLine());
      }
    }

    restoreValidationSet(rateMatrix);
  }