/** * Builds L2-regularized regressors for a sequence of regularization parameter lambdas on sparse * inputs. Each row of the input represents a feature (instead of a data point), i.e., in * column-oriented format. This procedure does not assume the data is normalized or centered. * * @param attrs the attribute list. * @param indices the indices. * @param values the values. * @param y the targets. * @param maxNumIters the maximum number of iterations. * @param lambdas the lambdas array. * @return L2-regularized regressors. */ public GLM[] buildGaussianRegressors( int[] attrs, int[][] indices, double[][] values, double[] y, int maxNumIters, double[] lambdas) { double[] w = new double[attrs.length]; double intercept = 0; GLM[] glms = new GLM[lambdas.length]; Arrays.sort(lambdas); // Backup targets double[] rTrain = new double[y.length]; for (int i = 0; i < rTrain.length; i++) { rTrain[i] = y[i]; } // Calculate sum of squares double[] sq = new double[attrs.length]; for (int i = 0; i < values.length; i++) { sq[i] = StatUtils.sumSq(values[i]); } // Compute the regularization path for (int g = 0; g < glms.length; g++) { double lambda = lambdas[g]; // Coordinate descent final double tl2 = lambda * y.length; for (int iter = 0; iter < maxNumIters; iter++) { double prevLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda); if (fitIntercept) { intercept += OptimUtils.fitIntercept(rTrain); } doOnePassGaussian(indices, values, sq, tl2, w, rTrain); double currLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda); if (verbose) { System.out.println("Iteration " + iter + ": " + " " + currLoss); } if (OptimUtils.isConverged(prevLoss, currLoss, epsilon)) { break; } } glms[g] = GLMOptimUtils.getGLM(attrs, w, intercept, LinkFunction.IDENTITY); } return glms; }
/** * Builds L2-regularized binary classifiers for a sequence of regularization parameter lambdas on * sparse format. Each row of the input represents a feature (instead of a data point), i.e., in * column-oriented format. This procedure does not assume the data is normalized or centered. * * @param attrs the attribute list. * @param indices the indices. * @param values the values. * @param y the targets. * @param maxNumIters the maximum number of iterations. * @param lambdas the lambdas array. * @return L2-regularized classifiers. */ public GLM[] buildBinaryClassifiers( int[] attrs, int[][] indices, double[][] values, double[] y, int maxNumIters, double[] lambdas) { double[] w = new double[attrs.length]; double intercept = 0; double[] pTrain = new double[y.length]; double[] rTrain = new double[y.length]; OptimUtils.computePseudoResidual(pTrain, y, rTrain); // Calculate theta's double[] theta = new double[values.length]; for (int i = 0; i < values.length; i++) { theta[i] = StatUtils.sumSq(values[i]) / 4; } GLM[] glms = new GLM[lambdas.length]; Arrays.sort(lambdas); for (int g = 0; g < glms.length; g++) { double lambda = lambdas[g]; // Coordinate gradient descent final double tl2 = lambda * y.length; for (int iter = 0; iter < maxNumIters; iter++) { double prevLoss = GLMOptimUtils.computeRidgeLoss(pTrain, y, w, lambda); if (fitIntercept) { intercept += OptimUtils.fitIntercept(pTrain, rTrain, y); } doOnePassBinomial(indices, values, theta, y, tl2, w, pTrain, rTrain); double currLoss = GLMOptimUtils.computeRidgeLoss(pTrain, y, w, lambda); if (verbose) { System.out.println("Iteration " + iter + ": " + " " + currLoss); } if (OptimUtils.isConverged(prevLoss, currLoss, epsilon)) { break; } } glms[g] = GLMOptimUtils.getGLM(attrs, w, intercept, LinkFunction.LOGIT); } return glms; }
/** * Builds an L2-regularized regressor on sparse inputs. Each row of the input represents a feature * (instead of a data point), i.e., in column-oriented format. This procedure does not assume the * data is normalized or centered. * * @param attrs the attribute list. * @param indices the indices. * @param values the values. * @param y the targets. * @param maxNumIters the maximum number of iterations. * @param lambda the lambda. * @return an L2-regularized regressor. */ public GLM buildGaussianRegressor( int[] attrs, int[][] indices, double[][] values, double[] y, int maxNumIters, double lambda) { double[] w = new double[attrs.length]; double intercept = 0; // Initialize residuals double[] rTrain = new double[y.length]; for (int i = 0; i < rTrain.length; i++) { rTrain[i] = y[i]; } // Calculate sum of squares double[] sq = new double[attrs.length]; for (int i = 0; i < values.length; i++) { sq[i] = StatUtils.sumSq(values[i]); } // Coordinate descent final double tl2 = lambda * y.length; for (int iter = 0; iter < maxNumIters; iter++) { double prevLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda); if (fitIntercept) { intercept += OptimUtils.fitIntercept(rTrain); } doOnePassGaussian(indices, values, sq, tl2, w, rTrain); double currLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda); if (verbose) { System.out.println("Iteration " + iter + ": " + " " + currLoss); } if (OptimUtils.isConverged(prevLoss, currLoss, epsilon)) { break; } } return GLMOptimUtils.getGLM(attrs, w, intercept, LinkFunction.IDENTITY); }
/** * Builds L2-regularized classifiers for a sequence of regularization parameter lambdas. * * @param trainSet the training set. * @param isSparse <code>true</code> if the training set is treated as sparse. * @param maxNumIters the maximum number of iterations. * @param lambdas the lambdas array. * @return L2-regularized binary classifiers. */ public GLM[] buildClassifiers( Instances trainSet, boolean isSparse, int maxNumIters, double[] lambdas) { Attribute classAttribute = trainSet.getTargetAttribute(); if (classAttribute.getType() != Attribute.Type.NOMINAL) { throw new IllegalArgumentException("Class attribute must be nominal."); } NominalAttribute clazz = (NominalAttribute) classAttribute; int numClasses = clazz.getStates().length; if (isSparse) { SparseDataset sd = getSparseDataset(trainSet, true); int[] attrs = sd.attrs; int[][] indices = sd.indices; double[][] values = sd.values; double[] y = new double[sd.y.length]; double[] cList = sd.cList; if (numClasses == 2) { for (int i = 0; i < y.length; i++) { int label = (int) sd.y[i]; y[i] = label == 0 ? 1 : 0; } GLM[] glms = buildBinaryClassifiers(attrs, indices, values, y, maxNumIters, lambdas); for (GLM glm : glms) { double[] w = glm.w[0]; for (int j = 0; j < cList.length; j++) { int attIndex = attrs[j]; w[attIndex] *= cList[j]; } } return glms; } else { int p = attrs.length == 0 ? 0 : (StatUtils.max(attrs) + 1); GLM[] glms = new GLM[lambdas.length]; for (int i = 0; i < glms.length; i++) { glms[i] = new GLM(numClasses, p); } for (int k = 0; k < numClasses; k++) { // One-vs-the-rest for (int i = 0; i < y.length; i++) { int label = (int) sd.y[i]; y[i] = label == k ? 1 : 0; } GLM[] binaryClassifiers = buildBinaryClassifiers(attrs, indices, values, y, maxNumIters, lambdas); for (int l = 0; l < glms.length; l++) { GLM binaryClassifier = binaryClassifiers[l]; GLM glm = glms[l]; double[] w = binaryClassifier.w[0]; for (int j = 0; j < cList.length; j++) { int attIndex = attrs[j]; glm.w[k][attIndex] = w[attIndex] * cList[j]; } glm.intercept[k] = binaryClassifier.intercept[0]; } } return glms; } } else { DenseDataset dd = getDenseDataset(trainSet, true); int[] attrs = dd.attrs; double[][] x = dd.x; double[] y = new double[dd.y.length]; double[] cList = dd.cList; if (numClasses == 2) { for (int i = 0; i < y.length; i++) { int label = (int) dd.y[i]; y[i] = label == 0 ? 1 : 0; } GLM[] glms = buildBinaryClassifiers(attrs, x, y, maxNumIters, lambdas); for (GLM glm : glms) { double[] w = glm.w[0]; for (int j = 0; j < cList.length; j++) { int attIndex = attrs[j]; w[attIndex] *= cList[j]; } } return glms; } else { int p = attrs.length == 0 ? 0 : attrs[attrs.length - 1] + 1; GLM[] glms = new GLM[lambdas.length]; for (int i = 0; i < glms.length; i++) { glms[i] = new GLM(numClasses, p); } for (int k = 0; k < numClasses; k++) { // One-vs-the-rest for (int i = 0; i < y.length; i++) { int label = (int) dd.y[i]; y[i] = label == k ? 1 : 0; } GLM[] binaryClassifiers = buildBinaryClassifiers(attrs, x, y, maxNumIters, lambdas); for (int l = 0; l < glms.length; l++) { GLM binaryClassifier = binaryClassifiers[l]; GLM glm = glms[l]; double[] w = binaryClassifier.w[0]; for (int j = 0; j < cList.length; j++) { int attIndex = attrs[j]; glm.w[k][attIndex] = w[attIndex] * cList[j]; } glm.intercept[k] = binaryClassifier.intercept[0]; } } return glms; } } }