public static class PCAParameters extends Model.Parameters { public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation public Method _pca_method = Method.GramSVD; // Method for computing PCA public int _k = 1; // Number of principal components public int _max_iterations = 1000; // Max iterations public long _seed = System.nanoTime(); // RNG seed // public Key<Frame> _loading_key; public String _loading_name; // Loading only generated if pca_method = Power public boolean _keep_loading = true; public boolean _use_all_factor_levels = false; // When expanding categoricals, should first level be kept or dropped? public boolean _compute_metrics = true; // Should a second pass be made through data to compute metrics? public enum Method { GramSVD, Power, GLRM } }
public static class GLRMParameters extends Model.Parameters { public String algoName() { return "GLRM"; } public String fullName() { return "Generalized Low Rank Modeling"; } public String javaName() { return GLRMModel.class.getName(); } public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation (demean to compare with PCA) public int _k = 1; // Rank of resulting XY matrix public GLRM.Initialization _init = GLRM.Initialization.PlusPlus; // Initialization of Y matrix public SVDParameters.Method _svd_method = SVDParameters.Method.Randomized; // SVD initialization method (for _init = SVD) public Key<Frame> _user_y; // User-specified Y matrix (for _init = User) public Key<Frame> _user_x; // User-specified X matrix (for _init = User) public boolean _expand_user_y = true; // Should categorical columns in _user_y be expanded via one-hot encoding? (for _init // = User) // Loss functions public Loss _loss = Loss.Quadratic; // Default loss function for numeric cols public Loss _multi_loss = Loss.Categorical; // Default loss function for categorical cols public int _period = 1; // Length of the period when _loss = Periodic public Loss[] _loss_by_col; // Override default loss function for specific columns public int[] _loss_by_col_idx; // Regularization functions public Regularizer _regularization_x = Regularizer.None; // Regularization function for X matrix public Regularizer _regularization_y = Regularizer.None; // Regularization function for Y matrix public double _gamma_x = 0; // Regularization weight on X matrix public double _gamma_y = 0; // Regularization weight on Y matrix // Optional parameters public int _max_iterations = 1000; // Max iterations public int _max_updates = 2 * _max_iterations; // Max number of updates (X or Y) public double _init_step_size = 1.0; // Initial step size (decrease until we hit min_step_size) public double _min_step_size = 1e-4; // Min step size public long _seed = System.nanoTime(); // RNG seed @Override protected long nFoldSeed() { return _seed; } // public Key<Frame> _representation_key; // Key to save X matrix public String _representation_name; public boolean _recover_svd = false; // Recover singular values and eigenvectors of XY at the end? public boolean _impute_original = false; // Reconstruct original training data by reversing _transform? public boolean _verbose = true; // Log when objective increases each iteration? // Quadratic -> Gaussian distribution ~ exp(-(a-u)^2) // Absolute -> Laplace distribution ~ exp(-|a-u|) public enum Loss { Quadratic(true), Absolute(true), Huber(true), Poisson(true), Periodic(true), // One-dimensional loss (numeric) Logistic(true, true), Hinge(true, true), // Boolean loss (categorical) Categorical(false), Ordinal(false); // Multi-dimensional loss (categorical) private boolean forNumeric; private boolean forBinary; Loss(boolean forNumeric) { this(forNumeric, false); } Loss(boolean forNumeric, boolean forBinary) { this.forNumeric = forNumeric; this.forBinary = forBinary; } public boolean isForNumeric() { return forNumeric; } public boolean isForCategorical() { return !forNumeric; } public boolean isForBinary() { return forBinary; } } // Non-negative matrix factorization (NNMF): r_x = r_y = NonNegative // Orthogonal NNMF: r_x = OneSparse, r_y = NonNegative // K-means clustering: r_x = UnitOneSparse, r_y = 0 (\gamma_y = 0) // Quadratic mixture: r_x = Simplex, r_y = 0 (\gamma_y = 0) public enum Regularizer { None, Quadratic, L2, L1, NonNegative, OneSparse, UnitOneSparse, Simplex } // Check if all elements of _loss_by_col are equal to a specific loss function private final boolean allLossEquals(Loss loss) { if (null == _loss_by_col) return false; boolean res = true; for (int i = 0; i < _loss_by_col.length; i++) { if (_loss_by_col[i] != loss) { res = false; break; } } return res; } // Closed form solution only if quadratic loss, no regularization or quadratic regularization // (same for X and Y), and no missing values public final boolean hasClosedForm() { long na_cnt = 0; Frame train = _train.get(); for (int i = 0; i < train.numCols(); i++) na_cnt += train.vec(i).naCnt(); return hasClosedForm(na_cnt); } public final boolean hasClosedForm(long na_cnt) { boolean loss_quad = (null == _loss_by_col && _loss == Quadratic) || (null != _loss_by_col && allLossEquals(Quadratic) && (_loss_by_col.length == _train.get().numCols() || _loss == Quadratic)); return na_cnt == 0 && ((loss_quad && (_gamma_x == 0 || _regularization_x == Regularizer.None || _regularization_x == GLRMParameters.Regularizer.Quadratic) && (_gamma_y == 0 || _regularization_y == Regularizer.None || _regularization_y == GLRMParameters.Regularizer.Quadratic))); } // L(u,a): Loss function public final double loss(double u, double a) { return loss(u, a, _loss); } public final double loss(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return (u - a) * (u - a); case Absolute: return Math.abs(u - a); case Huber: return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5; case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0 case Hinge: // return Math.max(1-a*u,0); return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1} case Logistic: // return Math.log(1 + Math.exp(-a * u)); return Math.log( 1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1} case Periodic: return 1 - Math.cos((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } } // \grad_u L(u,a): Gradient of loss function with respect to u public final double lgrad(double u, double a) { return lgrad(u, a, _loss); } public final double lgrad(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return 2 * (u - a); case Absolute: return Math.signum(u - a); case Huber: return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a); case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) - a; case Hinge: // return a*u <= 1 ? -a : 0; return a == 0 ? (-u <= 1 ? 1 : 0) : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1} case Logistic: // return -a/(1+Math.exp(a*u)); return a == 0 ? 1 / (1 + Math.exp(-u)) : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1} case Periodic: return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } } // L(u,a): Multidimensional loss function public final double mloss(double[] u, int a) { return mloss(u, a, _multi_loss); } public static double mloss(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double sum = 0; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0); sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0); return sum; case Ordinal: for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0); return sum; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } // \grad_u L(u,a): Gradient of multidimensional loss function with respect to u public final double[] mlgrad(double[] u, int a) { return mlgrad(u, a, _multi_loss); } public static double[] mlgrad(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double[] grad = new double[u.length]; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) grad[i] = (1 + u[i] > 0) ? 1 : 0; grad[a] = (1 - u[a] > 0) ? -1 : 0; return grad; case Ordinal: for (int i = 0; i < u.length - 1; i++) grad[i] = (a > i && 1 - u[i] > 0) ? -1 : 0; return grad; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } // r_i(x_i), r_j(y_j): Regularization function for single row x_i or column y_j public final double regularize_x(double[] u) { return regularize(u, _regularization_x); } public final double regularize_y(double[] u) { return regularize(u, _regularization_y); } public final double regularize(double[] u, Regularizer regularization) { if (u == null) return 0; double ureg = 0; switch (regularization) { case None: return 0; case Quadratic: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return ureg; case L2: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return Math.sqrt(ureg); case L1: for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]); return ureg; case NonNegative: for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; } return 0; case OneSparse: int card = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else if (u[i] > 0) card++; } return card == 1 ? 0 : Double.POSITIVE_INFINITY; case UnitOneSparse: int ones = 0, zeros = 0; for (int i = 0; i < u.length; i++) { if (u[i] == 1) ones++; else if (u[i] == 0) zeros++; else return Double.POSITIVE_INFINITY; } return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY; case Simplex: double sum = 0, absum = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else { sum += u[i]; absum += Math.abs(u[i]); } } return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum) ? 0 : Double.POSITIVE_INFINITY; default: throw new RuntimeException("Unknown regularization function " + regularization); } } // \sum_i r_i(x_i): Sum of regularization function for all entries of X public final double regularize_x(double[][] u) { return regularize(u, _regularization_x); } public final double regularize_y(double[][] u) { return regularize(u, _regularization_y); } public final double regularize(double[][] u, Regularizer regularization) { if (u == null || regularization == Regularizer.None) return 0; double ureg = 0; for (int i = 0; i < u.length; i++) { ureg += regularize(u[i], regularization); if (Double.isInfinite(ureg)) return ureg; } return ureg; } // \prox_{\alpha_k*r}(u): Proximal gradient of (step size) * (regularization function) evaluated // at vector u public final double[] rproxgrad_x(double[] u, double alpha, Random rand) { return rproxgrad(u, alpha, _gamma_x, _regularization_x, rand); } public final double[] rproxgrad_y(double[] u, double alpha, Random rand) { return rproxgrad(u, alpha, _gamma_y, _regularization_y, rand); } // public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); } // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); } static double[] rproxgrad( double[] u, double alpha, double gamma, Regularizer regularization, Random rand) { if (u == null || alpha == 0 || gamma == 0) return u; double[] v = new double[u.length]; switch (regularization) { case None: return u; case Quadratic: for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma); return v; case L2: // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u); if (weight < 0) return v; // Zero vector for (int i = 0; i < u.length; i++) v[i] = weight * u[i]; return v; case L1: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0); return v; case NonNegative: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0); return v; case OneSparse: int idx = ArrayUtils.maxIndex(u, rand); v[idx] = u[idx] > 0 ? u[idx] : 1e-6; return v; case UnitOneSparse: idx = ArrayUtils.maxIndex(u, rand); v[idx] = 1; return v; case Simplex: // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n] int n = u.length; int[] idxs = new int[n]; for (int i = 0; i < n; i++) idxs[i] = i; ArrayUtils.sort(idxs, u); // 2) Calculate cumulative sum of u in descending order // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n]) double[] ucsum = new double[n]; ucsum[n - 1] = u[idxs[n - 1]]; for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]]; // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i) // For i = n-1,...,1, set optimal t* to first t_i >= u[i] double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n for (int i = n - 1; i >= 1; i--) { double tmp = (ucsum[i] - 1) / (n - i); if (tmp >= u[idxs[i - 1]]) { t = tmp; break; } } // 4) Return max(u - t*, 0) as projection of u onto simplex double[] x = new double[u.length]; for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0); return x; default: throw new RuntimeException("Unknown regularization function " + regularization); } } // Project X,Y matrices into appropriate subspace so regularizer is finite. Used during // initialization. public final double[] project_x(double[] u, Random rand) { return project(u, _regularization_x, rand); } public final double[] project_y(double[] u, Random rand) { return project(u, _regularization_y, rand); } public final double[] project(double[] u, Regularizer regularization, Random rand) { if (u == null) return u; switch (regularization) { // Domain is all real numbers case None: case Quadratic: case L2: case L1: return u; // Proximal operator of indicator function for a set C is (Euclidean) projection onto C case NonNegative: case OneSparse: case UnitOneSparse: return rproxgrad(u, 1, 1, regularization, rand); case Simplex: double reg = regularize( u, regularization); // Check if inside simplex before projecting since algo is // complicated if (reg == 0) return u; return rproxgrad(u, 1, 1, regularization, rand); default: throw new RuntimeException("Unknown regularization function " + regularization); } } // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for real numeric values public final double impute(double u) { return impute(u, _loss); } public static double impute(double u, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: case Absolute: case Huber: case Periodic: return u; case Poisson: return Math.exp(u) - 1; case Hinge: case Logistic: return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1} default: throw new RuntimeException("Unknown loss function " + loss); } } // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for categorical values // {0,1,2,...} // TODO: Is there a faster way to find the loss minimizer? public final int mimpute(double[] u) { return mimpute(u, _multi_loss); } public static int mimpute(double[] u, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; switch (multi_loss) { case Categorical: case Ordinal: double[] cand = new double[u.length]; for (int a = 0; a < cand.length; a++) cand[a] = mloss(u, a, multi_loss); return ArrayUtils.minIndex(cand); default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } }