@Override public void predict(long uid, @Nonnull MutableSparseVector predictions) { logger.debug("predicting {} items for {}", predictions.keyDomain().size(), uid); OrdRecModel params = new OrdRecModel(quantizer); SparseVector ratings = makeUserVector(uid, userEventDao); LongSet keySet = LongUtils.setUnion(ratings.keySet(), predictions.keyDomain()); MutableSparseVector scores = MutableSparseVector.create(keySet); itemScorer.score(uid, scores); params.train(ratings, scores); logger.debug("trained parameters for {}: {}", uid, params); Vector probabilities = Vector.createLength(params.getLevelCount()); Long2ObjectMap<IVector> distChannel = null; if (reportDistribution) { distChannel = predictions.addChannel(RATING_PROBABILITY_CHANNEL); } for (VectorEntry e : predictions.fast(VectorEntry.State.EITHER)) { long iid = e.getKey(); double score = scores.get(iid); params.getProbDistribution(score, probabilities); int mlIdx = probabilities.maxElementIndex(); predictions.set(e, quantizer.getIndexValue(mlIdx)); if (distChannel != null) { distChannel.put(e.getKey(), probabilities.immutable()); } } }
/** * The constructor of OrdRecParameter. It use the quantized values of rating to initialize t1 * and beta. Each threshold is initialized as the mean of two contiguous rating values. Since * the index of quantizer is always an successive non-negative integer begin from 0, so t1 will * initialize as 0.5, and the interval between two thresholds will be 1. * * @param qtz The quantizer for ratings */ private OrdRecModel(Quantizer qtz) { qtzValues = qtz.getValues(); levelCount = qtzValues.length(); t1 = (qtzValues.get(0) + qtzValues.get(1)) / 2; beta = Vector.createLength(levelCount - 2); double tr = t1; for (int i = 1; i <= beta.length(); i++) { double trnext = (qtzValues.get(i) + qtzValues.get(i + 1)) * 0.5; beta.set(i - 1, Math.log(trnext - tr)); tr = trnext; } }
/** The train function of OrdRec. Get all parameters after learning process. */ @SuppressWarnings("ConstantConditions") private void train(SparseVector ratings, MutableSparseVector scores) { Vector dbeta = Vector.createLength(beta.length()); double dt1; // n is the number of iteration; for (int j = 0; j < iterationCount; j++) { for (VectorEntry rating : ratings.fast()) { long iid = rating.getKey(); double score = scores.get(iid); int r = quantizer.index(rating.getValue()); double probEqualR = getProbEQ(score, r); double probLessR = getProbLE(score, r); double probLessR_1 = getProbLE(score, r - 1); dt1 = learningRate / probEqualR * (probLessR * (1 - probLessR) * derivateOfBeta(r, 0, t1) - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, 0, t1) - regTerm * t1); double dbetaK; for (int k = 0; k < beta.length(); k++) { dbetaK = learningRate / probEqualR * (probLessR * (1 - probLessR) * derivateOfBeta(r, k + 1, beta.get(k)) - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, k + 1, beta.get(k)) - regTerm * beta.get(k)); dbeta.set(k, dbetaK); } t1 = t1 + dt1; beta.add(dbeta); } } }