// Makes and scales the matrices V, D, and VT (to avoid ugly decimals) private void makeVDVT(EigenDecomp ed) { V = ed.getV(); D = ed.getD(); VT = ed.getVT(); double ref = 0; for (int i = 0; i < V.getRowDimension(); i++) { ref = 0; for (int j = 0; j < V.getColumnDimension(); j++) { if (V.getEntry(j, i) != 0 && ref == 0) { ref = V.getEntry(j, i); } if (ref != 0) { V.setEntry(j, i, V.getEntry(j, i) / Math.abs(ref)); } } } for (int i = 0; i < VT.getRowDimension(); i++) { ref = 0; for (int j = 0; j < VT.getColumnDimension(); j++) { if (VT.getEntry(j, i) != 0 && ref == 0) { ref = VT.getEntry(j, i); } if (ref != 0) { VT.setEntry(j, i, VT.getEntry(j, i) / Math.abs(ref)); } } } }
/** * Computes the Kendall's Tau rank correlation matrix for the columns of the input matrix. * * @param matrix matrix with columns representing variables to correlate * @return correlation matrix */ public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) { int nVars = matrix.getColumnDimension(); RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); for (int i = 0; i < nVars; i++) { for (int j = 0; j < i; j++) { double corr = correlation(matrix.getColumn(i), matrix.getColumn(j)); outMatrix.setEntry(i, j, corr); outMatrix.setEntry(j, i, corr); } outMatrix.setEntry(i, i, 1d); } return outMatrix; }
/** * Compute a covariance matrix from a matrix whose columns represent covariates. * * @param matrix input matrix (must have at least one column and two rows) * @param biasCorrected determines whether or not covariance estimates are bias-corrected * @return covariance matrix * @throws MathIllegalArgumentException if the matrix does not contain sufficient data */ protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) throws MathIllegalArgumentException { int dimension = matrix.getColumnDimension(); Variance variance = new Variance(biasCorrected); RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); for (int i = 0; i < dimension; i++) { for (int j = 0; j < i; j++) { double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); outMatrix.setEntry(i, j, cov); outMatrix.setEntry(j, i, cov); } outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); } return outMatrix; }
/** * Derives a correlation matrix from a covariance matrix. * * <p>Uses the formula <br> * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where <code>r(·,·)</code> is the * correlation coefficient and <code>s(·)</code> means standard deviation. * * @param covarianceMatrix the covariance matrix * @return correlation matrix */ public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) { int nVars = covarianceMatrix.getColumnDimension(); RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); for (int i = 0; i < nVars; i++) { double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i)); outMatrix.setEntry(i, i, 1d); for (int j = 0; j < i; j++) { double entry = covarianceMatrix.getEntry(i, j) / (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j))); outMatrix.setEntry(i, j, entry); outMatrix.setEntry(j, i, entry); } } return outMatrix; }
/** * Build a rating matrix from the rating data. Each user's ratings are first normalized by * subtracting a baseline score (usually a mean). * * @param userMapping The index mapping of user IDs to column numbers. * @param itemMapping The index mapping of item IDs to row numbers. * @return A matrix storing the <i>normalized</i> user ratings. */ private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) { final int nusers = userMapping.size(); final int nitems = itemMapping.size(); // Create a matrix with users on rows and items on columns logger.info("creating {} by {} rating matrix", nusers, nitems); RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems); // populate it with data Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser(); try { for (UserHistory<Event> user : users) { // Get the row number for this user int u = userMapping.getIndex(user.getUserId()); MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class)); MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet()); baselineScorer.score(user.getUserId(), baselines); // TODO Populate this user's row with their ratings, minus the baseline scores for (VectorEntry entry : ratings.fast(State.SET)) { long itemid = entry.getKey(); int i = itemMapping.getIndex(itemid); double rating = entry.getValue(); double baseline = baselines.get(itemid); matrix.setEntry(u, i, rating - baseline); } } } finally { users.close(); } return matrix; }
/** * Calculates {@code P(D_n < d)} using method described in [1] and doubles (see above). * * @param d statistic * @return the two-sided probability of {@code P(D_n < d)} * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link * org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) / * m} for integer {@code k, m} and {@code 0 <= h < 1}. */ private double roundedK(double d) throws MathArithmeticException { final int k = (int) FastMath.ceil(n * d); final FieldMatrix<BigFraction> HBigFraction = this.createH(d); final int m = HBigFraction.getRowDimension(); /* * Here the rounding part comes into play: use * RealMatrix instead of FieldMatrix<BigFraction> */ final RealMatrix H = new Array2DRowRealMatrix(m, m); for (int i = 0; i < m; ++i) { for (int j = 0; j < m; ++j) { H.setEntry(i, j, HBigFraction.getEntry(i, j).doubleValue()); } } final RealMatrix Hpower = H.power(n); double pFrac = Hpower.getEntry(k - 1, k - 1); for (int i = 1; i <= n; ++i) { pFrac *= (double) i / (double) n; } return pFrac; }
public static RealMatrix getRealMatrixFromJamaMatrix(Matrix m) { final int rDim = m.getRowDimension(); final int cDim = m.getColumnDimension(); RealMatrix rm = new Array2DRowRealMatrix(rDim, cDim); for (int i = 0; i < rDim; i++) { for (int j = 0; j < cDim; j++) { rm.setEntry(i, j, m.get(i, j)); } } return rm; }
@Override public void train(List<Instance> instances) { // ------------------------ initialize rows and columns --------------------- int rows = instances.size(); int columns = 0; // get max columns for (Instance i : instances) { int localColumns = Collections.max(i.getFeatureVector().getFeatureMap().keySet()); if (localColumns > columns) columns = localColumns; } // ------------------------ initialize alpha vector ----------------------- alpha = new ArrayRealVector(rows, 0); // ------------------------ initialize base X and Y for use -------------------------- double[][] X = new double[rows][columns]; double[] Y = new double[rows]; for (int i = 0; i < rows; i++) { Y[i] = ((ClassificationLabel) instances.get(i).getLabel()).getLabelValue(); for (int j = 0; j < columns; j++) { X[i][j] = instances.get(i).getFeatureVector().get(j + 1); } } // ---------------------- gram matrix ------------------- matrixX = new Array2DRowRealMatrix(X); RealMatrix gram = new Array2DRowRealMatrix(rows, rows); for (int i = 0; i < rows; i++) { for (int j = 0; j < rows; j++) { gram.setEntry(i, j, kernelFunction(matrixX.getRowVector(i), matrixX.getRowVector(j))); } } // ---------------------- gradient ascent -------------------------- Sigmoid g = new Sigmoid(); // helper function System.out.println("Training start..."); System.out.println( "Learning rate: " + _learning_rate + " Training times: " + _training_iterations); for (int idx = 0; idx < _training_iterations; idx++) { System.out.println("Training iteration: " + (idx + 1)); for (int k = 0; k < rows; k++) { double gradient_ascent = 0.0; RealVector alpha_gram = gram.operate(alpha); for (int i = 0; i < rows; i++) { double lambda = alpha_gram.getEntry(i); double kernel = gram.getEntry(i, k); gradient_ascent = gradient_ascent + Y[i] * g.value(-lambda) * kernel + (1 - Y[i]) * g.value(lambda) * (-kernel); } alpha.setEntry(k, alpha.getEntry(k) + _learning_rate * gradient_ascent); } } System.out.println("Training done!"); }
private RealMatrix normalizeData(RealMatrix matrix, UserProfileEigenModel model) { RealMatrix normalizedData = new Array2DRowRealMatrix(matrix.getRowDimension(), matrix.getColumnDimension()); if (LOG.isDebugEnabled()) LOG.debug("model statistics size: " + model.statistics().length); for (int i = 0; i < matrix.getRowDimension(); i++) { for (int j = 0; j < matrix.getColumnDimension(); j++) { double value = (matrix.getEntry(i, j) - model.statistics()[j].getMean()) / model.statistics()[j].getStddev(); normalizedData.setEntry(i, j, value); } } return normalizedData; }
private RealMatrix removeZeroColumns(RealMatrix base, List<Integer> zeroColumns) { int adjustedDim = base.getRowDimension() - zeroColumns.size(); if (adjustedDim == 0) return base; RealMatrix adjusted = new Array2DRowRealMatrix(adjustedDim, adjustedDim); int i = 0, j = 0; for (int basei = 0; basei < base.getRowDimension(); basei++) { if (zeroColumns.contains(basei)) continue; for (int basej = 0; basej < base.getColumnDimension(); basej++) { if (zeroColumns.contains(basej)) continue; adjusted.setEntry(i, j++, base.getEntry(basei, basej)); } i++; j = 0; } return adjusted; }
@Override public RealMatrix computeIOCorrelationMatrix(SampleIterator it) { it.reset(); Sample sample = it.next(); int inputDim = sample.getEncodedInput().getDimension(); int outputDim = sample.getEncodedOutput().getDimension(); RealMatrix M = new Array2DRowRealMatrix(inputDim, outputDim); for (int i = 0; i < inputDim; i++) { for (int j = 0; j < outputDim; j++) { it.reset(); M.setEntry(i, j, correlationRatio(it, i, j)); } } return M; }
public double valueAt(double[] param) { double[] sdInv = new double[nVariables]; for (int i = 0; i < nVariables; i++) { R.setEntry(i, i, 1.0 - param[i]); sdInv[i] = 1.0 / Sinv.getEntry(i, i); } DiagonalMatrix diagSdInv = new DiagonalMatrix(sdInv); EigenDecomposition eigen = new EigenDecomposition(R); RealMatrix eigenVectors = eigen.getV().getSubMatrix(0, nVariables - 1, 0, nFactors - 1); double[] ev = new double[nFactors]; for (int i = 0; i < nFactors; i++) { ev[i] = Math.sqrt(eigen.getRealEigenvalue(i)); } DiagonalMatrix evMatrix = new DiagonalMatrix( ev); // USE Apache version of Diagonal matrix when upgrade to version 3.2 RealMatrix LAMBDA = eigenVectors.multiply(evMatrix); RealMatrix SIGMA = (LAMBDA.multiply(LAMBDA.transpose())); double value = 0.0; RealMatrix DIF = R.subtract(SIGMA); for (int i = 0; i < DIF.getRowDimension(); i++) { for (int j = 0; j < DIF.getColumnDimension(); j++) { value = DIF.getEntry(i, j); DIF.setEntry(i, j, Math.pow(value, 2)); } } RealMatrix RESID = diagSdInv.multiply(DIF).multiply(diagSdInv); double sum = 0.0; for (int i = 0; i < RESID.getRowDimension(); i++) { for (int j = 0; j < RESID.getColumnDimension(); j++) { sum += RESID.getEntry(i, j); } } return sum; }
@Test public void testSubtractMatrixNorm() { RealMatrix m, mp; m = MatrixUtils.createRealMatrix(3, 5); mp = MatrixUtils.createRealMatrix(3, 5); m.setEntry(1, 2, 0.1613351); mp.setEntry(1, 2, 0.486433333333); Assert.assertEquals(0.0, Graphulo.nmfError_Client(m, mp), 0.000001); m.setEntry(2, 2, 0.76); mp.setEntry(2, 2, 0.25); Assert.assertEquals(0.0, Graphulo.nmfError_Client(m, mp), 0.000001); m.setEntry(2, 1, 0.4); mp.setEntry(2, 0, 0.9); Assert.assertEquals(2.0 / 3.0, Graphulo.nmfError_Client(m, mp), 0.000001); }
@Override public List<MLCallbackResult> detect( final String user, final String algorithm, UserActivityAggModel userActivity, UserProfileEigenModel aModel) { RealMatrix inputData = userActivity.matrix(); LOG.warn( "EigenBasedAnomalyDetection predictAnomaly called with dimension: " + inputData.getRowDimension() + "x" + inputData.getColumnDimension()); if (aModel == null) { LOG.warn( "nothing to do as the input model does not have required values, returning from evaluating this algorithm.."); return null; } List<MLCallbackResult> mlCallbackResults = new ArrayList<MLCallbackResult>(); RealMatrix normalizedMat = normalizeData(inputData, aModel); UserCommandStatistics[] listStats = aModel.statistics(); int colWithHighVariant = 0; for (int j = 0; j < normalizedMat.getColumnDimension(); j++) { if (listStats[j].isLowVariant() == false) { colWithHighVariant++; } } final Map<String, String> context = new HashMap<String, String>() { { put(UserProfileConstants.USER_TAG, user); put(UserProfileConstants.ALGORITHM_TAG, algorithm); } }; Map<Integer, String> lineNoWithVariantBasedAnomalyDetection = new HashMap<Integer, String>(); for (int i = 0; i < normalizedMat.getRowDimension(); i++) { MLCallbackResult aResult = new MLCallbackResult(); aResult.setContext(context); for (int j = 0; j < normalizedMat.getColumnDimension(); j++) { // LOG.info("mean for j=" + j + " is:" + listStats[j].getMean()); // LOG.info("stddev for j=" + j + " is:" + listStats[j].getStddev()); if (listStats[j].isLowVariant() == true) { // LOG.info(listOfCmds[j] + " is low variant"); if (normalizedMat.getEntry(i, j) > listStats[j].getMean()) { lineNoWithVariantBasedAnomalyDetection.put(i, "lowVariantAnomaly"); aResult.setAnomaly(true); aResult.setTimestamp(userActivity.timestamp()); aResult.setFeature(listStats[j].getCommandName()); aResult.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM); List<String> datapoints = new ArrayList<String>(); double[] rowVals = inputData.getRow(i); for (double rowVal : rowVals) datapoints.add(rowVal + ""); aResult.setDatapoints(datapoints); aResult.setId(user); mlCallbackResults.add(aResult); } else { aResult.setAnomaly(false); aResult.setTimestamp(userActivity.timestamp()); mlCallbackResults.add(aResult); } } } // return results; } // LOG.info("results size here: " + results.length); // LOG.info("col with high variant: " + colWithHighVariant); RealMatrix finalMatWithoutLowVariantFeatures = new Array2DRowRealMatrix(normalizedMat.getRowDimension(), colWithHighVariant); // LOG.info("size of final test data: " + finalMatWithoutLowVariantFeatures.getRowDimension() // +"x"+ finalMatWithoutLowVariantFeatures.getColumnDimension()); int finalMatrixRow = 0; int finalMatrixCol = 0; for (int i = 0; i < normalizedMat.getRowDimension(); i++) { for (int j = 0; j < normalizedMat.getColumnDimension(); j++) { if (listStats[j].isLowVariant() == false) { finalMatWithoutLowVariantFeatures.setEntry( finalMatrixRow, finalMatrixCol, normalizedMat.getEntry(i, j)); finalMatrixCol++; } } finalMatrixCol = 0; finalMatrixRow++; } RealVector[] pcs = aModel.principalComponents(); // LOG.info("pc size: " + pcs.getRowDimension() +"x" + pcs.getColumnDimension()); RealMatrix finalInputMatTranspose = finalMatWithoutLowVariantFeatures.transpose(); for (int i = 0; i < finalMatWithoutLowVariantFeatures.getRowDimension(); i++) { if (lineNoWithVariantBasedAnomalyDetection.get(i) == null) { MLCallbackResult result = new MLCallbackResult(); result.setContext(context); for (int sz = 0; sz < pcs.length; sz++) { double[] pc1 = pcs[sz].toArray(); RealMatrix pc1Mat = new Array2DRowRealMatrix(pc1); RealMatrix transposePC1Mat = pc1Mat.transpose(); RealMatrix testData = pc1Mat.multiply(transposePC1Mat).multiply(finalInputMatTranspose.getColumnMatrix(i)); // LOG.info("testData size: " + testData.getRowDimension() + "x" + // testData.getColumnDimension()); RealMatrix testDataTranspose = testData.transpose(); // LOG.info("testData transpose size: " + testDataTranspose.getRowDimension() + "x" + // testDataTranspose.getColumnDimension()); RealVector iRowVector = testDataTranspose.getRowVector(0); // RealVector pc1Vector = transposePC1Mat.getRowVector(sz); RealVector pc1Vector = transposePC1Mat.getRowVector(0); double distanceiRowAndPC1 = iRowVector.getDistance(pc1Vector); // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " + // model.getMaxL2Norm().getEntry(sz)); // LOG.info("model.getMaxL2Norm().getEntry(sz):" + model.getMaxL2Norm().getEntry(sz)); if (distanceiRowAndPC1 > aModel.maximumL2Norm().getEntry(sz)) { // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " + // model.getMaxL2Norm().getEntry(sz)); result.setAnomaly(true); result.setFeature(aModel.statistics()[sz].getCommandName()); result.setTimestamp(System.currentTimeMillis()); result.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM); List<String> datapoints = new ArrayList<String>(); double[] rowVals = inputData.getRow(i); for (double rowVal : rowVals) datapoints.add(rowVal + ""); result.setDatapoints(datapoints); result.setId(user); } } mlCallbackResults.add(result); } } return mlCallbackResults; }