/** Items in validationMatrix are moved to original rateMatrix. */ private void restoreValidationSet(SparseMatrix rateMatrix) { for (int i = 1; i <= userCount; i++) { SparseVector row = validationMatrix.getRowRef(i); int[] itemList = row.indexList(); if (itemList != null) { for (int j : itemList) { rateMatrix.setValue(i, j, validationMatrix.getValue(i, j)); } } } }
/** * Items which will be used for validation purpose are moved from rateMatrix to validationMatrix. * * @param validationRatio Proportion of dataset, using for validation purpose. */ private void makeValidationSet(SparseMatrix rateMatrix, double validationRatio) { validationMatrix = new SparseMatrix(userCount + 1, itemCount + 1); int validationCount = (int) (rateMatrix.itemCount() * validationRatio); while (validationCount > 0) { int index = (int) (Math.random() * userCount) + 1; SparseVector row = rateMatrix.getRowRef(index); int[] itemList = row.indexList(); if (itemList != null && itemList.length > 5) { int index2 = (int) (Math.random() * itemList.length); validationMatrix.setValue( index, itemList[index2], rateMatrix.getValue(index, itemList[index2])); rateMatrix.setValue(index, itemList[index2], 0.0); validationCount--; } } }
/** * Evaluate the designated algorithm with the given test data. * * @param testMatrix The rating matrix with test data. * @return The result of evaluation, such as MAE, RMSE, and rank-score. */ @Override public EvaluationMetrics evaluate(SparseMatrix testMatrix) { SparseMatrix predicted = new SparseMatrix(userCount + 1, itemCount + 1); for (int u = 1; u <= userCount; u++) { SparseVector items = testMatrix.getRowRef(u); int[] itemIndexList = items.indexList(); if (itemIndexList != null) { for (int i : itemIndexList) { double prediction = 0.0; for (int l = 0; l < modelMax; l++) { prediction += localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i)) * KernelSmoothing.kernelize( getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType) * KernelSmoothing.kernelize( getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType) / weightSum[u][i]; } if (Double.isNaN(prediction) || prediction == 0.0) { prediction = (maxValue + minValue) / 2; } if (prediction < minValue) { prediction = minValue; } else if (prediction > maxValue) { prediction = maxValue; } predicted.setValue(u, i, prediction); } } } return new EvaluationMetrics(testMatrix, predicted, maxValue, minValue); }
/** * Build a model with given training set. * * @param rateMatrix The rating matrix with train data. */ @Override public void buildModel(SparseMatrix rateMatrix) { makeValidationSet(rateMatrix, validationRatio); // Preparing data structures: localUserFeatures = new SparseMatrix[modelMax]; localItemFeatures = new SparseMatrix[modelMax]; anchorUser = new int[modelMax]; anchorItem = new int[modelMax]; for (int l = 0; l < modelMax; l++) { boolean done = false; while (!done) { int u_t = (int) Math.floor(Math.random() * userCount) + 1; int[] itemList = rateMatrix.getRow(u_t).indexList(); if (itemList != null) { int idx = (int) Math.floor(Math.random() * itemList.length); int i_t = itemList[idx]; anchorUser[l] = u_t; anchorItem[l] = i_t; done = true; } } } // Pre-calculating similarity: userSimilarity = new SparseMatrix(userCount + 1, userCount + 1); itemSimilarity = new SparseMatrix(itemCount + 1, itemCount + 1); weightSum = new double[userCount + 1][itemCount + 1]; for (int u = 1; u <= userCount; u++) { for (int i = 1; i <= itemCount; i++) { for (int l = 0; l < modelMax; l++) { weightSum[u][i] += KernelSmoothing.kernelize( getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType) * KernelSmoothing.kernelize( getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType); } } } // Initialize local models: for (int l = 0; l < modelMax; l++) { localUserFeatures[l] = new SparseMatrix(userCount + 1, featureCount); localItemFeatures[l] = new SparseMatrix(featureCount, itemCount + 1); for (int u = 1; u <= userCount; u++) { for (int r = 0; r < featureCount; r++) { double rdm = Math.random(); localUserFeatures[l].setValue(u, r, rdm); } } for (int i = 1; i <= itemCount; i++) { for (int r = 0; r < featureCount; r++) { double rdm = Math.random(); localItemFeatures[l].setValue(r, i, rdm); } } } // Learn by gradient descent: int round = 0; double prevErr = 99999; double currErr = 9999; while (Math.abs(prevErr - currErr) > 0.001 && round < maxIter) { for (int u = 1; u <= userCount; u++) { SparseVector items = rateMatrix.getRowRef(u); int[] itemIndexList = items.indexList(); if (itemIndexList != null) { for (int i : itemIndexList) { // current estimation: double RuiEst = 0.0; for (int l = 0; l < modelMax; l++) { RuiEst += localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i)) * KernelSmoothing.kernelize( getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType) * KernelSmoothing.kernelize( getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType) / weightSum[u][i]; } double RuiReal = rateMatrix.getValue(u, i); double err = RuiReal - RuiEst; // parameter update: for (int l = 0; l < modelMax; l++) { double weight = KernelSmoothing.kernelize( getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType) * KernelSmoothing.kernelize( getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType) / weightSum[u][i]; for (int r = 0; r < featureCount; r++) { double Fus = localUserFeatures[l].getValue(u, r); double Gis = localItemFeatures[l].getValue(r, i); localUserFeatures[l].setValue( u, r, Fus + learningRate * (err * Gis * weight - regularizer * Fus)); if (Double.isNaN(Fus + learningRate * (err * Gis * weight - regularizer * Fus))) { // System.out.println("a"); } localItemFeatures[l].setValue( r, i, Gis + learningRate * (err * Fus * weight - regularizer * Gis)); if (Double.isNaN(Gis + learningRate * (err * Fus * weight - regularizer * Gis))) { // System.out.println("b"); } } } } } } // Intermediate evaluation for trend graphing: SparseMatrix predicted = new SparseMatrix(userCount + 1, itemCount + 1); for (int u = 1; u <= userCount; u++) { SparseVector items = validationMatrix.getRowRef(u); int[] itemIndexList = items.indexList(); if (itemIndexList != null) { for (int i : itemIndexList) { double prediction = 0.0; for (int l = 0; l < modelMax; l++) { prediction += localUserFeatures[l].getRow(u).innerProduct(localItemFeatures[l].getCol(i)) * KernelSmoothing.kernelize( getUserSimilarity(anchorUser[l], u), kernelWidth, kernelType) * KernelSmoothing.kernelize( getItemSimilarity(anchorItem[l], i), kernelWidth, kernelType) / weightSum[u][i]; } if (Double.isNaN(prediction) || prediction == 0.0) { prediction = (maxValue + minValue) / 2; } if (prediction < minValue) { prediction = minValue; } else if (prediction > maxValue) { prediction = maxValue; } predicted.setValue(u, i, prediction); } } } EvaluationMetrics e = new EvaluationMetrics(validationMatrix, predicted, maxValue, minValue); prevErr = currErr; currErr = e.getRMSE(); round++; // Show progress: if (showProgress) { System.out.println(round + "\t" + e.printOneLine()); } } restoreValidationSet(rateMatrix); }