@Test public void testInverse2x2() { double tol = 0.001; Map<Key, Value> input = new TreeMap<>(TestUtil.COMPARE_KEY_TO_COLQ); input.put(new Key("1", "", "1"), new Value("4".getBytes())); input.put(new Key("1", "", "2"), new Value("3".getBytes())); input.put(new Key("2", "", "1"), new Value("1".getBytes())); input.put(new Key("2", "", "2"), new Value("1".getBytes())); Map<Key, Value> expect = new TreeMap<>(TestUtil.COMPARE_KEY_TO_COLQ); expect.put(new Key("1", "", "1"), new Value("1 ".getBytes())); expect.put(new Key("1", "", "2"), new Value("-3".getBytes())); expect.put(new Key("2", "", "1"), new Value("-1".getBytes())); expect.put(new Key("2", "", "2"), new Value("4 ".getBytes())); RealMatrix matrix = MemMatrixUtil.buildMatrix(input.entrySet().iterator(), 2); Assert.assertEquals(2, matrix.getRowDimension()); Assert.assertEquals(2, matrix.getColumnDimension()); Assert.assertEquals(4, matrix.getEntry(0, 0), tol); Assert.assertEquals(3, matrix.getEntry(0, 1), tol); Assert.assertEquals(1, matrix.getEntry(1, 0), tol); Assert.assertEquals(1, matrix.getEntry(1, 1), tol); matrix = MemMatrixUtil.doInverse(matrix, -1); Assert.assertEquals(2, matrix.getRowDimension()); Assert.assertEquals(2, matrix.getColumnDimension()); Assert.assertEquals(1, matrix.getEntry(0, 0), tol); Assert.assertEquals(-3, matrix.getEntry(0, 1), tol); Assert.assertEquals(-1, matrix.getEntry(1, 0), tol); Assert.assertEquals(4, matrix.getEntry(1, 1), tol); SortedMap<Key, Value> back = MemMatrixUtil.matrixToMap(new TreeMap<Key, Value>(TestUtil.COMPARE_KEY_TO_COLQ), matrix); TestUtil.assertEqualDoubleMap(expect, back); }
/** * Applies rank transform to each of the columns of <code>matrix</code> using the current <code> * rankingAlgorithm</code> * * @param matrix matrix to transform * @return a rank-transformed matrix */ private RealMatrix rankTransform(final RealMatrix matrix) { RealMatrix transformed = null; if (rankingAlgorithm instanceof NaturalRanking && ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) { final Set<Integer> nanPositions = new HashSet<Integer>(); for (int i = 0; i < matrix.getColumnDimension(); i++) { nanPositions.addAll(getNaNPositions(matrix.getColumn(i))); } // if we have found NaN values, we have to update the matrix size if (!nanPositions.isEmpty()) { transformed = new BlockRealMatrix( matrix.getRowDimension() - nanPositions.size(), matrix.getColumnDimension()); for (int i = 0; i < transformed.getColumnDimension(); i++) { transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions)); } } } if (transformed == null) { transformed = matrix.copy(); } for (int i = 0; i < transformed.getColumnDimension(); i++) { transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i))); } return transformed; }
@Override public Label predict(Instance instance) { Label l = null; if (instance.getLabel() instanceof ClassificationLabel || instance.getLabel() == null) { // ----------------- declare variables ------------------ double lambda = 0.0; RealVector x_instance = new ArrayRealVector(matrixX.getColumnDimension(), 0); double result = 0.0; // -------------------------- initialize xi ------------------------- for (int idx = 0; idx < matrixX.getColumnDimension(); idx++) { x_instance.setEntry(idx, instance.getFeatureVector().get(idx + 1)); } // ------------------ get lambda ----------------------- for (int j = 0; j < alpha.getDimension(); j++) { lambda += alpha.getEntry(j) * kernelFunction(matrixX.getRowVector(j), x_instance); } // ----------------- make prediction ----------------- Sigmoid g = new Sigmoid(); // helper function result = g.value(lambda); l = new ClassificationLabel(result < 0.5 ? 0 : 1); } else { System.out.println("label type error!"); } return l; }
// Makes and scales the matrices V, D, and VT (to avoid ugly decimals) private void makeVDVT(EigenDecomp ed) { V = ed.getV(); D = ed.getD(); VT = ed.getVT(); double ref = 0; for (int i = 0; i < V.getRowDimension(); i++) { ref = 0; for (int j = 0; j < V.getColumnDimension(); j++) { if (V.getEntry(j, i) != 0 && ref == 0) { ref = V.getEntry(j, i); } if (ref != 0) { V.setEntry(j, i, V.getEntry(j, i) / Math.abs(ref)); } } } for (int i = 0; i < VT.getRowDimension(); i++) { ref = 0; for (int j = 0; j < VT.getColumnDimension(); j++) { if (VT.getEntry(j, i) != 0 && ref == 0) { ref = VT.getEntry(j, i); } if (ref != 0) { VT.setEntry(j, i, VT.getEntry(j, i) / Math.abs(ref)); } } } }
@Test public void testInverseIdentity() { double tol = 0.00001; Map<Key, Value> input = new TreeMap<>(TestUtil.COMPARE_KEY_TO_COLQ); input.put(new Key("1", "", "1"), new Value("1".getBytes())); // input.put(new Key("1", "", "2"), new Value("1".getBytes())); // input.put(new Key("2", "", "1"), new Value("1".getBytes())); input.put(new Key("2", "", "2"), new Value("1".getBytes())); RealMatrix matrix = MemMatrixUtil.buildMatrix(input.entrySet().iterator(), 2); Assert.assertEquals(2, matrix.getRowDimension()); Assert.assertEquals(2, matrix.getColumnDimension()); Assert.assertEquals(1, matrix.getEntry(0, 0), tol); Assert.assertEquals(0, matrix.getEntry(0, 1), tol); Assert.assertEquals(0, matrix.getEntry(1, 0), tol); Assert.assertEquals(1, matrix.getEntry(1, 1), tol); matrix = MemMatrixUtil.doInverse(matrix, -1); Assert.assertEquals(2, matrix.getRowDimension()); Assert.assertEquals(2, matrix.getColumnDimension()); Assert.assertEquals(1, matrix.getEntry(0, 0), tol); Assert.assertEquals(0, matrix.getEntry(0, 1), tol); Assert.assertEquals(0, matrix.getEntry(1, 0), tol); Assert.assertEquals(1, matrix.getEntry(1, 1), tol); SortedMap<Key, Value> back = MemMatrixUtil.matrixToMap(new TreeMap<Key, Value>(TestUtil.COMPARE_KEY_TO_COLQ), matrix); TestUtil.assertEqualDoubleMap(input, back); // Assert.assertEquals(1, Double.parseDouble(new String(back.get(new Key("1", "", // "1")).get())), tol); // Assert.assertEquals(1, Double.parseDouble(new String(back.get(new Key("2", "", // "2")).get())), tol); }
/** * @param weight Weight matrix. * @throws NonSquareMatrixException if the argument is not a square matrix. */ public Weight(RealMatrix weight) { if (weight.getColumnDimension() != weight.getRowDimension()) { throw new NonSquareMatrixException(weight.getColumnDimension(), weight.getRowDimension()); } weightMatrix = weight.copy(); }
private RealMatrix normalizeData(RealMatrix matrix, UserProfileEigenModel model) { RealMatrix normalizedData = new Array2DRowRealMatrix(matrix.getRowDimension(), matrix.getColumnDimension()); if (LOG.isDebugEnabled()) LOG.debug("model statistics size: " + model.statistics().length); for (int i = 0; i < matrix.getRowDimension(); i++) { for (int j = 0; j < matrix.getColumnDimension(); j++) { double value = (matrix.getEntry(i, j) - model.statistics()[j].getMean()) / model.statistics()[j].getStddev(); normalizedData.setEntry(i, j, value); } } return normalizedData; }
public double computeSimilarity(RealMatrix sourceDoc, RealMatrix targetDoc) { if (sourceDoc.getRowDimension() != targetDoc.getRowDimension() || sourceDoc.getColumnDimension() != targetDoc.getColumnDimension() || sourceDoc.getColumnDimension() != 1) { throw new IllegalArgumentException( "Matrices are not column matrices or not of the same size"); } double[] source = sourceDoc.getColumn(0); double[] target = targetDoc.getColumn(0); double dotProduct = dot(source, target); double distance = norm(source) * norm(target); return dotProduct / distance; }
private List<Integer> findZeroColumns(RealMatrix base) { List<Integer> indices = new ArrayList<>(); for (int i = 0; i < base.getColumnDimension(); i++) { if (base.getColumnVector(i).getNorm() == 0) indices.add(i); } return indices; }
public static RealMatrix stochasticSubmatrix(RealMatrix data, int batch_size, Random rng) { // assume all data has the size number_samples by number_features int num_samples = data.getRowDimension(); int num_features = data.getColumnDimension(); int batch_num = num_samples / batch_size + 1; // randomly generate a batch index int batch_index = rng.nextInt(batch_num); List<Integer> rowIndex_tmp = new ArrayList<Integer>(); for (int i = 0; i < batch_size; i++) { if (batch_size * batch_index + i >= num_samples) { break; } else { rowIndex_tmp.add(batch_size * batch_index + i); } } int[] rowIndex = TypeConvert.ArrayTointv(rowIndex_tmp); // System.out.println(rowIndex_tmp); int[] columnIndex = new int[num_features]; for (int j = 0; j < num_features; j++) { columnIndex[j] = j; } // System.out.println(batch_index); // return null; return data.getSubMatrix(rowIndex, columnIndex); }
public WeightedLeastSquaresMethod(RealMatrix R, int nFactors, RotationMethod rotationMethod) { this.nVariables = R.getColumnDimension(); this.nParam = nVariables; this.nFactors = nFactors; this.rotationMethod = rotationMethod; this.R = R; this.R2 = R.copy(); }
/** * Throws MathIllegalArgumentException if the matrix does not have at least two columns and two * rows. * * @param matrix matrix to check for sufficiency * @throws MathIllegalArgumentException if there is insufficient data */ private void checkSufficientData(final RealMatrix matrix) { int nRows = matrix.getRowDimension(); int nCols = matrix.getColumnDimension(); if (nRows < 2 || nCols < 2) { throw new MathIllegalArgumentException( LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, nRows, nCols); } }
public double sumMatrix(RealMatrix matrix) { double sum = 0.0; for (int i = 0; i < matrix.getRowDimension(); i++) { for (int j = 0; j < matrix.getColumnDimension(); j++) { sum += matrix.getEntry(i, j); } } return sum; }
public double[][] residuals() { double[][] resid = new double[nItems][nItems]; for (int i = 0; i < SIGMA.getRowDimension(); i++) { for (int j = 0; j < SIGMA.getColumnDimension(); j++) { resid[i][j] = varcov.getEntry(i, j) - SIGMA.getEntry(i, j); } } return resid; }
public double valueAt(double[] param) { double[] sdInv = new double[nVariables]; for (int i = 0; i < nVariables; i++) { R.setEntry(i, i, 1.0 - param[i]); sdInv[i] = 1.0 / Sinv.getEntry(i, i); } DiagonalMatrix diagSdInv = new DiagonalMatrix(sdInv); EigenDecomposition eigen = new EigenDecomposition(R); RealMatrix eigenVectors = eigen.getV().getSubMatrix(0, nVariables - 1, 0, nFactors - 1); double[] ev = new double[nFactors]; for (int i = 0; i < nFactors; i++) { ev[i] = Math.sqrt(eigen.getRealEigenvalue(i)); } DiagonalMatrix evMatrix = new DiagonalMatrix( ev); // USE Apache version of Diagonal matrix when upgrade to version 3.2 RealMatrix LAMBDA = eigenVectors.multiply(evMatrix); RealMatrix SIGMA = (LAMBDA.multiply(LAMBDA.transpose())); double value = 0.0; RealMatrix DIF = R.subtract(SIGMA); for (int i = 0; i < DIF.getRowDimension(); i++) { for (int j = 0; j < DIF.getColumnDimension(); j++) { value = DIF.getEntry(i, j); DIF.setEntry(i, j, Math.pow(value, 2)); } } RealMatrix RESID = diagSdInv.multiply(DIF).multiply(diagSdInv); double sum = 0.0; for (int i = 0; i < RESID.getRowDimension(); i++) { for (int j = 0; j < RESID.getColumnDimension(); j++) { sum += RESID.getEntry(i, j); } } return sum; }
public double[][] squaredResiduals() { double[][] resid = new double[nItems][nItems]; double temp = 0.0; for (int i = 0; i < SIGMA.getRowDimension(); i++) { for (int j = 0; j < SIGMA.getColumnDimension(); j++) { temp = varcov.getEntry(i, j) - SIGMA.getEntry(i, j); resid[i][j] = temp * temp; } } return resid; }
public double meanSquaredResidual() { double ni = Double.valueOf(nItems).doubleValue(); double temp = 0.0, sum = 0.0; for (int i = 0; i < SIGMA.getRowDimension(); i++) { for (int j = 0; j < SIGMA.getColumnDimension(); j++) { temp = varcov.getEntry(i, j) - SIGMA.getEntry(i, j); sum += temp * temp; } } return sum / (ni * ni); }
public double sumSquaredElements(RealMatrix matrix) { double sum = 0.0; double v = 0.0; for (int i = 0; i < matrix.getRowDimension(); i++) { for (int j = 0; j < matrix.getColumnDimension(); j++) { v = matrix.getEntry(i, j); sum += (v * v); } } return sum; }
/** * Returns a matrix of standard errors associated with the estimates in the correlation matrix. * <br> * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard error associated with * <code>getCorrelationMatrix.getEntry(i,j)</code> * * <p>The formula used to compute the standard error is <br> * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code> where <code>r * </code> is the estimated correlation coefficient and <code>n</code> is the number of * observations in the source dataset. * * <p>To use this method, one of the constructors that supply an input matrix must have been used * to create this instance. * * @return matrix of correlation standard errors * @throws NullPointerException if this instance was created with no data */ public RealMatrix getCorrelationStandardErrors() { int nVars = correlationMatrix.getColumnDimension(); double[][] out = new double[nVars][nVars]; for (int i = 0; i < nVars; i++) { for (int j = 0; j < nVars; j++) { double r = correlationMatrix.getEntry(i, j); out[i][j] = FastMath.sqrt((1 - r * r) / (nObs - 2)); } } return new BlockRealMatrix(out); }
/** * Computes the Kendall's Tau rank correlation matrix for the columns of the input matrix. * * @param matrix matrix with columns representing variables to correlate * @return correlation matrix */ public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) { int nVars = matrix.getColumnDimension(); RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); for (int i = 0; i < nVars; i++) { for (int j = 0; j < i; j++) { double corr = correlation(matrix.getColumn(i), matrix.getColumn(j)); outMatrix.setEntry(i, j, corr); outMatrix.setEntry(j, i, corr); } outMatrix.setEntry(i, i, 1d); } return outMatrix; }
private static double[] calculateColumnInverseMeans(RealMatrix matrix) { return IntStream.range(0, matrix.getColumnDimension()) .mapToDouble( i -> 1.0 / IntStream.range(0, matrix.getRowDimension()) .mapToDouble(j -> matrix.getEntry(j, i)) .average() .orElseThrow( () -> new IllegalArgumentException( "cannot calculate a average for column " + i))) .toArray(); }
/** * Compute a covariance matrix from a matrix whose columns represent covariates. * * @param matrix input matrix (must have at least one column and two rows) * @param biasCorrected determines whether or not covariance estimates are bias-corrected * @return covariance matrix * @throws MathIllegalArgumentException if the matrix does not contain sufficient data */ protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) throws MathIllegalArgumentException { int dimension = matrix.getColumnDimension(); Variance variance = new Variance(biasCorrected); RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); for (int i = 0; i < dimension; i++) { for (int j = 0; j < i; j++) { double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); outMatrix.setEntry(i, j, cov); outMatrix.setEntry(j, i, cov); } outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); } return outMatrix; }
private RealMatrix removeZeroColumns(RealMatrix base, List<Integer> zeroColumns) { int adjustedDim = base.getRowDimension() - zeroColumns.size(); if (adjustedDim == 0) return base; RealMatrix adjusted = new Array2DRowRealMatrix(adjustedDim, adjustedDim); int i = 0, j = 0; for (int basei = 0; basei < base.getRowDimension(); basei++) { if (zeroColumns.contains(basei)) continue; for (int basej = 0; basej < base.getColumnDimension(); basej++) { if (zeroColumns.contains(basej)) continue; adjusted.setEntry(i, j++, base.getEntry(basei, basej)); } i++; j = 0; } return adjusted; }
/** * Derives a correlation matrix from a covariance matrix. * * <p>Uses the formula <br> * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where <code>r(·,·)</code> is the * correlation coefficient and <code>s(·)</code> means standard deviation. * * @param covarianceMatrix the covariance matrix * @return correlation matrix */ public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) { int nVars = covarianceMatrix.getColumnDimension(); RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); for (int i = 0; i < nVars; i++) { double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i)); outMatrix.setEntry(i, i, 1d); for (int j = 0; j < i; j++) { double entry = covarianceMatrix.getEntry(i, j) / (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j))); outMatrix.setEntry(i, j, entry); outMatrix.setEntry(j, i, entry); } } return outMatrix; }
/** * Returns a matrix of p-values associated with the (two-sided) null hypothesis that the * corresponding correlation coefficient is zero. * * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability that a random variable * distributed as <code>t<sub>n-2</sub></code> takes a value with absolute value greater than or * equal to <br> * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code> * * <p>The values in the matrix are sometimes referred to as the <i>significance</i> of the * corresponding correlation coefficients. * * <p>To use this method, one of the constructors that supply an input matrix must have been used * to create this instance. * * @return matrix of p-values * @throws org.apache.commons.math3.exception.MaxCountExceededException if an error occurs * estimating probabilities * @throws NullPointerException if this instance was created with no data */ public RealMatrix getCorrelationPValues() { TDistribution tDistribution = new TDistribution(nObs - 2); int nVars = correlationMatrix.getColumnDimension(); double[][] out = new double[nVars][nVars]; for (int i = 0; i < nVars; i++) { for (int j = 0; j < nVars; j++) { if (i == j) { out[i][j] = 0d; } else { double r = correlationMatrix.getEntry(i, j); double t = FastMath.abs(r * FastMath.sqrt((nObs - 2) / (1 - r * r))); out[i][j] = 2 * tDistribution.cumulativeProbability(-t); } } } return new BlockRealMatrix(out); }
/** * Tangent normalize a coverage profile. * * <p>Notes about the Spark tangent normalization can be found in docs/PoN/ * * @param pon Not {@code null} * @param targetFactorNormalizedCounts ReadCountCollection of counts that have already been * normalized fully (typically, including the target factor normalization). I.e. a coverage * profile The column names should be intact. Not {@code null} See {@link * TangentNormalizer::createCoverageProfile} * @return never {@code null} */ private static TangentNormalizationResult tangentNormalize( final PoN pon, final ReadCountCollection targetFactorNormalizedCounts, JavaSparkContext ctx) { Utils.nonNull(pon, "PoN cannot be null."); Utils.nonNull(targetFactorNormalizedCounts, "targetFactorNormalizedCounts cannot be null."); Utils.nonNull( targetFactorNormalizedCounts.columnNames(), "targetFactorNormalizedCounts column names cannot be null."); ParamUtils.isPositive( targetFactorNormalizedCounts.columnNames().size(), "targetFactorNormalizedCounts column names cannot be an empty list."); final Case2PoNTargetMapper targetMapper = new Case2PoNTargetMapper(targetFactorNormalizedCounts.targets(), pon.getPanelTargetNames()); // The input counts with rows (targets) sorted so that they match the PoN's order. final RealMatrix tangentNormalizationRawInputCounts = targetMapper.fromCaseToPoNCounts(targetFactorNormalizedCounts.counts()); // We prepare the counts for tangent normalization. final RealMatrix tangentNormalizationInputCounts = composeTangentNormalizationInputMatrix(tangentNormalizationRawInputCounts); if (ctx == null) { // Calculate the beta-hats for the input read count columns (samples). logger.info("Calculating beta hats..."); final RealMatrix tangentBetaHats = pon.betaHats(tangentNormalizationInputCounts, true, EPSILON); // Actual tangent normalization step. logger.info( "Performing actual tangent normalization (" + tangentNormalizationInputCounts.getColumnDimension() + " columns)..."); final RealMatrix tangentNormalizedCounts = pon.tangentNormalization(tangentNormalizationInputCounts, tangentBetaHats, true); // Output the tangent normalized counts. logger.info("Post-processing tangent normalization results..."); final ReadCountCollection tangentNormalized = targetMapper.fromPoNtoCaseCountCollection( tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames()); final ReadCountCollection preTangentNormalized = targetMapper.fromPoNtoCaseCountCollection( tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames()); return new TangentNormalizationResult( tangentNormalized, preTangentNormalized, tangentBetaHats, targetFactorNormalizedCounts); } else { /* Using Spark: the code here is a little more complex for optimization purposes. Please see notes in docs/PoN ... Ahat^T = (C^T P^T) A^T Therefore, C^T is the RowMatrix pinv: P panel: A projection: Ahat cases: C betahat: C^T P^T tangentNormalizedCounts: C - Ahat */ final RealMatrix pinv = pon.getReducedPanelPInverseCounts(); final RealMatrix panel = pon.getReducedPanelCounts(); // Make the C^T a distributed matrix (RowMatrix) final RowMatrix caseTDistMat = SparkConverter.convertRealMatrixToSparkRowMatrix( ctx, tangentNormalizationInputCounts.transpose(), TN_NUM_SLICES_SPARK); // Spark local matrices (transposed) final Matrix pinvTLocalMat = new DenseMatrix( pinv.getRowDimension(), pinv.getColumnDimension(), Doubles.concat(pinv.getData()), true) .transpose(); final Matrix panelTLocalMat = new DenseMatrix( panel.getRowDimension(), panel.getColumnDimension(), Doubles.concat(panel.getData()), true) .transpose(); // Calculate the projection transpose in a distributed matrix, then convert to Apache Commons // matrix (not transposed) final RowMatrix betahatDistMat = caseTDistMat.multiply(pinvTLocalMat); final RowMatrix projectionTDistMat = betahatDistMat.multiply(panelTLocalMat); final RealMatrix projection = SparkConverter.convertSparkRowMatrixToRealMatrix( projectionTDistMat, tangentNormalizationInputCounts.transpose().getRowDimension()) .transpose(); // Subtract the cases from the projection final RealMatrix tangentNormalizedCounts = tangentNormalizationInputCounts.subtract(projection); // Construct the result object and return it with the correct targets. final ReadCountCollection tangentNormalized = targetMapper.fromPoNtoCaseCountCollection( tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames()); final ReadCountCollection preTangentNormalized = targetMapper.fromPoNtoCaseCountCollection( tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames()); final RealMatrix tangentBetaHats = SparkConverter.convertSparkRowMatrixToRealMatrix( betahatDistMat, tangentNormalizedCounts.getColumnDimension()); return new TangentNormalizationResult( tangentNormalized, preTangentNormalized, tangentBetaHats.transpose(), targetFactorNormalizedCounts); } }
/////////////////////////////////// Constructor //////////////////////////////////////////////// public CostFunction( List<Pixel> input, List<Pixel> average, List<Pixel> model, RealMatrix inputFeatPts, RealMatrix averageFeatPts, RealMatrix modelFeatPts, float[][] eigenVectorsS, float[][] eigenVectorsT, Bitmap bmpModel, float[] inputAlpha, float[] inputBeta, float sigmaI, float sigmaF) { // this.featPtsIndex = readBin83PtIndex(CONFIG_DIRECTORY, INDEX83PT_FILE); this.input = input; this.average = average; this.model = model; this.inputFeatPts = inputFeatPts; this.averageFeatPts = averageFeatPts; this.modelFeatPts = modelFeatPts; this.k = inputFeatPts.getRowDimension(); // should be equal to 83 this.num_points = input.size(); // should be equal to 8489 this.eigValS = readBinFloat(SHAPE_DIRECTORY, EIG_SHAPE_FILE, 60); this.eigValT = readBinFloat(TEXTURE_DIRECTORY, EIG_TEXTURE_FILE, 100); /*this.subFSV = readBinSFSV(CONFIG_DIRECTORY, SFSV_FILE);*/ this.landmarks83Index = readBin83PtIndex( CONFIG_DIRECTORY, INDEX83PT_FILE); // we access directly in featureShape file // instead of using subFSV file // Checking arguments if (num_points != 8489) { throw new IllegalArgumentException("num_points not equal 8489"); } if (k != 83) { throw new IllegalArgumentException("k not equal 83"); } if (input.isEmpty() || average.isEmpty() || model.isEmpty()) { throw new IllegalArgumentException("input or average or model list are empty"); } if (input.size() != model.size()) { throw new IllegalArgumentException("input and model list do not have the same size"); } if (input.size() != average.size()) { throw new IllegalArgumentException("input and average list do not have the same size"); } if (average.size() != model.size()) { throw new IllegalArgumentException("average and model list do not have the same size"); } if (averageFeatPts.getRowDimension() != k || averageFeatPts.getColumnDimension() != inputFeatPts.getColumnDimension()) { throw new IllegalArgumentException( "inputFeatPts and averageFeatPts do not have the same size"); } if (modelFeatPts.getRowDimension() != k || modelFeatPts.getColumnDimension() != inputFeatPts.getColumnDimension()) { throw new IllegalArgumentException("inputFeatPts and modelFeatPts do not have the same size"); } // Initialy populate list with 0, the value doesn't matter for (int h = 0; h < 500; h++) { this.randomList.add(0); } this.s = eigenVectorsS; this.t = eigenVectorsT; this.inputAlpha = inputAlpha; this.inputBeta = inputBeta; this.sigmaI = sigmaI; this.sigmaF = sigmaF; this.Iinput = computeIinput(); // is always the same this.Imodel = computeImodel(); this.IinputBeta = computeIinputBeta(); // is always the same this.ImodelBeta = computeImodelBeta(); this.dxModel = computeSobelGx(bmpModel); this.dyModel = computeSobelGy(bmpModel); // saveBitmaptoPNG(TEXTURE_DIRECTORY, "modelFace2DGx.png", bmpModelGx); //save // saveBitmaptoPNG(TEXTURE_DIRECTORY, "modelFace2DGy.png", bmpModelGy); //save this.E = computeE(); this.alpha = inputAlpha; // Initialize this.beta = inputBeta; // Initialize computeAlpha(); // Compute 60 alpha values output computeBeta(); // Compute 100 beta values output /* Collections.sort(randomList); for(int idx : randomList) { Log.d(TAG,"randomList = " + idx); }*/ }
@Override public List<MLCallbackResult> detect( final String user, final String algorithm, UserActivityAggModel userActivity, UserProfileEigenModel aModel) { RealMatrix inputData = userActivity.matrix(); LOG.warn( "EigenBasedAnomalyDetection predictAnomaly called with dimension: " + inputData.getRowDimension() + "x" + inputData.getColumnDimension()); if (aModel == null) { LOG.warn( "nothing to do as the input model does not have required values, returning from evaluating this algorithm.."); return null; } List<MLCallbackResult> mlCallbackResults = new ArrayList<MLCallbackResult>(); RealMatrix normalizedMat = normalizeData(inputData, aModel); UserCommandStatistics[] listStats = aModel.statistics(); int colWithHighVariant = 0; for (int j = 0; j < normalizedMat.getColumnDimension(); j++) { if (listStats[j].isLowVariant() == false) { colWithHighVariant++; } } final Map<String, String> context = new HashMap<String, String>() { { put(UserProfileConstants.USER_TAG, user); put(UserProfileConstants.ALGORITHM_TAG, algorithm); } }; Map<Integer, String> lineNoWithVariantBasedAnomalyDetection = new HashMap<Integer, String>(); for (int i = 0; i < normalizedMat.getRowDimension(); i++) { MLCallbackResult aResult = new MLCallbackResult(); aResult.setContext(context); for (int j = 0; j < normalizedMat.getColumnDimension(); j++) { // LOG.info("mean for j=" + j + " is:" + listStats[j].getMean()); // LOG.info("stddev for j=" + j + " is:" + listStats[j].getStddev()); if (listStats[j].isLowVariant() == true) { // LOG.info(listOfCmds[j] + " is low variant"); if (normalizedMat.getEntry(i, j) > listStats[j].getMean()) { lineNoWithVariantBasedAnomalyDetection.put(i, "lowVariantAnomaly"); aResult.setAnomaly(true); aResult.setTimestamp(userActivity.timestamp()); aResult.setFeature(listStats[j].getCommandName()); aResult.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM); List<String> datapoints = new ArrayList<String>(); double[] rowVals = inputData.getRow(i); for (double rowVal : rowVals) datapoints.add(rowVal + ""); aResult.setDatapoints(datapoints); aResult.setId(user); mlCallbackResults.add(aResult); } else { aResult.setAnomaly(false); aResult.setTimestamp(userActivity.timestamp()); mlCallbackResults.add(aResult); } } } // return results; } // LOG.info("results size here: " + results.length); // LOG.info("col with high variant: " + colWithHighVariant); RealMatrix finalMatWithoutLowVariantFeatures = new Array2DRowRealMatrix(normalizedMat.getRowDimension(), colWithHighVariant); // LOG.info("size of final test data: " + finalMatWithoutLowVariantFeatures.getRowDimension() // +"x"+ finalMatWithoutLowVariantFeatures.getColumnDimension()); int finalMatrixRow = 0; int finalMatrixCol = 0; for (int i = 0; i < normalizedMat.getRowDimension(); i++) { for (int j = 0; j < normalizedMat.getColumnDimension(); j++) { if (listStats[j].isLowVariant() == false) { finalMatWithoutLowVariantFeatures.setEntry( finalMatrixRow, finalMatrixCol, normalizedMat.getEntry(i, j)); finalMatrixCol++; } } finalMatrixCol = 0; finalMatrixRow++; } RealVector[] pcs = aModel.principalComponents(); // LOG.info("pc size: " + pcs.getRowDimension() +"x" + pcs.getColumnDimension()); RealMatrix finalInputMatTranspose = finalMatWithoutLowVariantFeatures.transpose(); for (int i = 0; i < finalMatWithoutLowVariantFeatures.getRowDimension(); i++) { if (lineNoWithVariantBasedAnomalyDetection.get(i) == null) { MLCallbackResult result = new MLCallbackResult(); result.setContext(context); for (int sz = 0; sz < pcs.length; sz++) { double[] pc1 = pcs[sz].toArray(); RealMatrix pc1Mat = new Array2DRowRealMatrix(pc1); RealMatrix transposePC1Mat = pc1Mat.transpose(); RealMatrix testData = pc1Mat.multiply(transposePC1Mat).multiply(finalInputMatTranspose.getColumnMatrix(i)); // LOG.info("testData size: " + testData.getRowDimension() + "x" + // testData.getColumnDimension()); RealMatrix testDataTranspose = testData.transpose(); // LOG.info("testData transpose size: " + testDataTranspose.getRowDimension() + "x" + // testDataTranspose.getColumnDimension()); RealVector iRowVector = testDataTranspose.getRowVector(0); // RealVector pc1Vector = transposePC1Mat.getRowVector(sz); RealVector pc1Vector = transposePC1Mat.getRowVector(0); double distanceiRowAndPC1 = iRowVector.getDistance(pc1Vector); // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " + // model.getMaxL2Norm().getEntry(sz)); // LOG.info("model.getMaxL2Norm().getEntry(sz):" + model.getMaxL2Norm().getEntry(sz)); if (distanceiRowAndPC1 > aModel.maximumL2Norm().getEntry(sz)) { // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " + // model.getMaxL2Norm().getEntry(sz)); result.setAnomaly(true); result.setFeature(aModel.statistics()[sz].getCommandName()); result.setTimestamp(System.currentTimeMillis()); result.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM); List<String> datapoints = new ArrayList<String>(); double[] rowVals = inputData.getRow(i); for (double rowVal : rowVals) datapoints.add(rowVal + ""); result.setDatapoints(datapoints); result.setId(user); } } mlCallbackResults.add(result); } } return mlCallbackResults; }
/** * Returns a list containing the indices of NaN values in the input array. * * @param input the input array * @return a list of NaN positions in the input array */ private void rankTransform(RealMatrix matrix) { for (int i = 0; i < matrix.getColumnDimension(); i++) { matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i))); } }