private Matrix getMatrixA() throws IOException { // Matrix A = new Matrix(this.getNumGoTerms(), this.annotations.sizeGenes()); Matrix A = new Matrix(this.getNumGoTerms(), this.RWC.getColumnDimension()); for (GOTerm currentGoTerm : this.subGoTerms) { // 0. check for NStar value > 0, since this indicates there // is an annotation if (this.getNumberOfAnnotationsStar(currentGoTerm) > 0) { // 0. Get all the genes annotating the current node // this will hodl the difference of the parent set with the child set Set<String> uniqueAnnotations = new HashSet<String>(this.annotations.getProteinsForGOTerm(currentGoTerm.getGOid())); // 1. Get all the genes annotating the children of the current node // 1.0 get all the children Set<GOTerm> children = new HashSet<GOTerm>(); for (String currentRelation : this.relations) { children.addAll(currentGoTerm.getChildrenForRelation(currentRelation)); } // 1.1 get all the genes for the children. Set<String> childrenAnnotations = new HashSet<String>(); for (GOTerm currentChild : children) { childrenAnnotations.addAll(this.annotations.getProteinsForGOTerm(currentChild.getGOid())); } // 2. Traverse the difference and count the number of terms annotating this et of genes. // 2.0 obtain the difference between the two nodes. that is, the annotations // that are unique to the current node.. uniqueAnnotations.removeAll(childrenAnnotations); for (String uniqueAnnotation : uniqueAnnotations) { // this is a tricky one. We are not sure what "directly" means in the paper. // but it should not be a very complicated problem to solve. int count = this.annotations.getGOTermScoresForProteinId(uniqueAnnotation).keySet().size(); // 0. get the protein id. A.set( this.goTermIndex.get(currentGoTerm.getNumericId()), this.proteinIndices.get(uniqueAnnotation), 1.0f / count); } } } this.logger.showMessage("Matrix A computed. % of sparseness = " + A.getSparsenessPercentage()); return A; }
private void setRandomWalkContributionGeneWise(Matrix W) throws IOException { // 0. get matrix A this.logger.showTimedMessage("Getting matrix A"); Matrix A = this.getMatrixA(); // 1. multiply both matrices. this.logger.showTimedMessage("Getting matrix B"); SparseMatrix W_ = W.getSparseMatrix(this.leafIndices, this.allIndices); this.logger.showMessage( "Matrix W_ computed. % of sparseness = " + W_.getSparsenessPercentage()); Matrix B = W_.times(A); // TODO: optimize this, A is always very sparse this.logger.showMessage("Matrix B computed. % of sparseness = " + B.getSparsenessPercentage()); // 2. calculate the RWC // 2.0 traverse all the products. // set the value for all eht rows for this column and this row // for RWC column_index == row_index this.logger.showTimedMessage("Computing RWC matrix as succesive Jaccard indexes"); final int N = this.RWC.getRowDimension(), M = this.RWC.getColumnDimension(); if (this.weightedJaccard) { this.logger.showTimedMessage("Jaccard index, _with_ IC"); // precomputing IC for each leaf final float invMaxAnnot = 1.0f / (float) this.maxNumberOfAnnotations; float IC[] = new float[this.leafIndices.length]; for (int i = 0; i < this.leafIndices.length; i++) { // we need to fetch the information content of the nodes if we use weighted jaccard. IC[i] = (float) -Math.log( this.numAnnotations.get(this.gotermIdByIndex.get(this.leafIndices[i])) * invMaxAnnot); } for (int i = 0; i < N; i++) { float column_i[] = B.getColumn(i); for (int j = i; j < M; j++) { float jaccardIndex = this.getJaccardIndexWithIC(column_i, B.getColumn(j), IC); this.RWC.set(i, j, jaccardIndex); this.RWC.set(j, i, jaccardIndex); } } } else { this.logger.showTimedMessage("Jaccard index, _without_ IC"); float sums[] = new float[N]; for (int i = 0; i < N; i++) { float sum = 0.0f; for (float val : B.getColumn(i)) { sum += val; } sums[i] = sum; } for (int i = 0; i < N; i++) { float column_i[] = B.getColumn(i); for (int j = i; j < M; j++) { float jaccardIndex = this.getJaccardIndexWithoutIC(column_i, B.getColumn(j), sums[i], sums[j]); this.RWC.set(i, j, jaccardIndex); this.RWC.set(j, i, jaccardIndex); } } } this.logger.showTimedMessage("RWC set!"); }