예제 #1
0
  /**
   * Builds L2-regularized regressors for a sequence of regularization parameter lambdas on sparse
   * inputs. Each row of the input represents a feature (instead of a data point), i.e., in
   * column-oriented format. This procedure does not assume the data is normalized or centered.
   *
   * @param attrs the attribute list.
   * @param indices the indices.
   * @param values the values.
   * @param y the targets.
   * @param maxNumIters the maximum number of iterations.
   * @param lambdas the lambdas array.
   * @return L2-regularized regressors.
   */
  public GLM[] buildGaussianRegressors(
      int[] attrs,
      int[][] indices,
      double[][] values,
      double[] y,
      int maxNumIters,
      double[] lambdas) {
    double[] w = new double[attrs.length];
    double intercept = 0;

    GLM[] glms = new GLM[lambdas.length];
    Arrays.sort(lambdas);

    // Backup targets
    double[] rTrain = new double[y.length];
    for (int i = 0; i < rTrain.length; i++) {
      rTrain[i] = y[i];
    }

    // Calculate sum of squares
    double[] sq = new double[attrs.length];
    for (int i = 0; i < values.length; i++) {
      sq[i] = StatUtils.sumSq(values[i]);
    }

    // Compute the regularization path
    for (int g = 0; g < glms.length; g++) {
      double lambda = lambdas[g];

      // Coordinate descent
      final double tl2 = lambda * y.length;
      for (int iter = 0; iter < maxNumIters; iter++) {
        double prevLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda);

        if (fitIntercept) {
          intercept += OptimUtils.fitIntercept(rTrain);
        }

        doOnePassGaussian(indices, values, sq, tl2, w, rTrain);

        double currLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda);

        if (verbose) {
          System.out.println("Iteration " + iter + ": " + " " + currLoss);
        }

        if (OptimUtils.isConverged(prevLoss, currLoss, epsilon)) {
          break;
        }
      }

      glms[g] = GLMOptimUtils.getGLM(attrs, w, intercept, LinkFunction.IDENTITY);
    }

    return glms;
  }
예제 #2
0
  /**
   * Builds L2-regularized binary classifiers for a sequence of regularization parameter lambdas on
   * sparse format. Each row of the input represents a feature (instead of a data point), i.e., in
   * column-oriented format. This procedure does not assume the data is normalized or centered.
   *
   * @param attrs the attribute list.
   * @param indices the indices.
   * @param values the values.
   * @param y the targets.
   * @param maxNumIters the maximum number of iterations.
   * @param lambdas the lambdas array.
   * @return L2-regularized classifiers.
   */
  public GLM[] buildBinaryClassifiers(
      int[] attrs,
      int[][] indices,
      double[][] values,
      double[] y,
      int maxNumIters,
      double[] lambdas) {
    double[] w = new double[attrs.length];
    double intercept = 0;

    double[] pTrain = new double[y.length];
    double[] rTrain = new double[y.length];
    OptimUtils.computePseudoResidual(pTrain, y, rTrain);

    // Calculate theta's
    double[] theta = new double[values.length];
    for (int i = 0; i < values.length; i++) {
      theta[i] = StatUtils.sumSq(values[i]) / 4;
    }

    GLM[] glms = new GLM[lambdas.length];
    Arrays.sort(lambdas);

    for (int g = 0; g < glms.length; g++) {
      double lambda = lambdas[g];

      // Coordinate gradient descent
      final double tl2 = lambda * y.length;
      for (int iter = 0; iter < maxNumIters; iter++) {
        double prevLoss = GLMOptimUtils.computeRidgeLoss(pTrain, y, w, lambda);

        if (fitIntercept) {
          intercept += OptimUtils.fitIntercept(pTrain, rTrain, y);
        }

        doOnePassBinomial(indices, values, theta, y, tl2, w, pTrain, rTrain);

        double currLoss = GLMOptimUtils.computeRidgeLoss(pTrain, y, w, lambda);

        if (verbose) {
          System.out.println("Iteration " + iter + ": " + " " + currLoss);
        }

        if (OptimUtils.isConverged(prevLoss, currLoss, epsilon)) {
          break;
        }
      }

      glms[g] = GLMOptimUtils.getGLM(attrs, w, intercept, LinkFunction.LOGIT);
    }

    return glms;
  }
예제 #3
0
  /**
   * Builds an L2-regularized regressor on sparse inputs. Each row of the input represents a feature
   * (instead of a data point), i.e., in column-oriented format. This procedure does not assume the
   * data is normalized or centered.
   *
   * @param attrs the attribute list.
   * @param indices the indices.
   * @param values the values.
   * @param y the targets.
   * @param maxNumIters the maximum number of iterations.
   * @param lambda the lambda.
   * @return an L2-regularized regressor.
   */
  public GLM buildGaussianRegressor(
      int[] attrs, int[][] indices, double[][] values, double[] y, int maxNumIters, double lambda) {
    double[] w = new double[attrs.length];
    double intercept = 0;

    // Initialize residuals
    double[] rTrain = new double[y.length];
    for (int i = 0; i < rTrain.length; i++) {
      rTrain[i] = y[i];
    }

    // Calculate sum of squares
    double[] sq = new double[attrs.length];
    for (int i = 0; i < values.length; i++) {
      sq[i] = StatUtils.sumSq(values[i]);
    }

    // Coordinate descent
    final double tl2 = lambda * y.length;
    for (int iter = 0; iter < maxNumIters; iter++) {
      double prevLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda);

      if (fitIntercept) {
        intercept += OptimUtils.fitIntercept(rTrain);
      }

      doOnePassGaussian(indices, values, sq, tl2, w, rTrain);

      double currLoss = GLMOptimUtils.computeRidgeLoss(rTrain, w, lambda);

      if (verbose) {
        System.out.println("Iteration " + iter + ": " + " " + currLoss);
      }

      if (OptimUtils.isConverged(prevLoss, currLoss, epsilon)) {
        break;
      }
    }

    return GLMOptimUtils.getGLM(attrs, w, intercept, LinkFunction.IDENTITY);
  }
예제 #4
0
  /**
   * Builds L2-regularized classifiers for a sequence of regularization parameter lambdas.
   *
   * @param trainSet the training set.
   * @param isSparse <code>true</code> if the training set is treated as sparse.
   * @param maxNumIters the maximum number of iterations.
   * @param lambdas the lambdas array.
   * @return L2-regularized binary classifiers.
   */
  public GLM[] buildClassifiers(
      Instances trainSet, boolean isSparse, int maxNumIters, double[] lambdas) {
    Attribute classAttribute = trainSet.getTargetAttribute();
    if (classAttribute.getType() != Attribute.Type.NOMINAL) {
      throw new IllegalArgumentException("Class attribute must be nominal.");
    }
    NominalAttribute clazz = (NominalAttribute) classAttribute;
    int numClasses = clazz.getStates().length;

    if (isSparse) {
      SparseDataset sd = getSparseDataset(trainSet, true);
      int[] attrs = sd.attrs;
      int[][] indices = sd.indices;
      double[][] values = sd.values;
      double[] y = new double[sd.y.length];
      double[] cList = sd.cList;

      if (numClasses == 2) {
        for (int i = 0; i < y.length; i++) {
          int label = (int) sd.y[i];
          y[i] = label == 0 ? 1 : 0;
        }

        GLM[] glms = buildBinaryClassifiers(attrs, indices, values, y, maxNumIters, lambdas);

        for (GLM glm : glms) {
          double[] w = glm.w[0];
          for (int j = 0; j < cList.length; j++) {
            int attIndex = attrs[j];
            w[attIndex] *= cList[j];
          }
        }

        return glms;
      } else {
        int p = attrs.length == 0 ? 0 : (StatUtils.max(attrs) + 1);
        GLM[] glms = new GLM[lambdas.length];
        for (int i = 0; i < glms.length; i++) {
          glms[i] = new GLM(numClasses, p);
        }

        for (int k = 0; k < numClasses; k++) {
          // One-vs-the-rest
          for (int i = 0; i < y.length; i++) {
            int label = (int) sd.y[i];
            y[i] = label == k ? 1 : 0;
          }

          GLM[] binaryClassifiers =
              buildBinaryClassifiers(attrs, indices, values, y, maxNumIters, lambdas);

          for (int l = 0; l < glms.length; l++) {
            GLM binaryClassifier = binaryClassifiers[l];
            GLM glm = glms[l];
            double[] w = binaryClassifier.w[0];
            for (int j = 0; j < cList.length; j++) {
              int attIndex = attrs[j];
              glm.w[k][attIndex] = w[attIndex] * cList[j];
            }
            glm.intercept[k] = binaryClassifier.intercept[0];
          }
        }

        return glms;
      }
    } else {
      DenseDataset dd = getDenseDataset(trainSet, true);
      int[] attrs = dd.attrs;
      double[][] x = dd.x;
      double[] y = new double[dd.y.length];
      double[] cList = dd.cList;

      if (numClasses == 2) {
        for (int i = 0; i < y.length; i++) {
          int label = (int) dd.y[i];
          y[i] = label == 0 ? 1 : 0;
        }

        GLM[] glms = buildBinaryClassifiers(attrs, x, y, maxNumIters, lambdas);

        for (GLM glm : glms) {
          double[] w = glm.w[0];
          for (int j = 0; j < cList.length; j++) {
            int attIndex = attrs[j];
            w[attIndex] *= cList[j];
          }
        }

        return glms;
      } else {
        int p = attrs.length == 0 ? 0 : attrs[attrs.length - 1] + 1;
        GLM[] glms = new GLM[lambdas.length];
        for (int i = 0; i < glms.length; i++) {
          glms[i] = new GLM(numClasses, p);
        }

        for (int k = 0; k < numClasses; k++) {
          // One-vs-the-rest
          for (int i = 0; i < y.length; i++) {
            int label = (int) dd.y[i];
            y[i] = label == k ? 1 : 0;
          }

          GLM[] binaryClassifiers = buildBinaryClassifiers(attrs, x, y, maxNumIters, lambdas);

          for (int l = 0; l < glms.length; l++) {
            GLM binaryClassifier = binaryClassifiers[l];
            GLM glm = glms[l];
            double[] w = binaryClassifier.w[0];
            for (int j = 0; j < cList.length; j++) {
              int attIndex = attrs[j];
              glm.w[k][attIndex] = w[attIndex] * cList[j];
            }
            glm.intercept[k] = binaryClassifier.intercept[0];
          }
        }

        return glms;
      }
    }
  }