// ************************************************************************** // ************************************************************************** // UTILITY FUNCTIONS // ************************************************************************** // ************************************************************************** private int getNumberOfAnnotations(GOTerm currentGoTerm) { // look for the annotations in the cache, if we find it, return it. int id = currentGoTerm.getNumericId(); if (this.numAnnotations.containsKey(id)) { return this.numAnnotations.get(id); } else { // this number is not cached yet. int num = this.annotations.countNumberOfGenesForGOTerm(currentGoTerm.getGOid()); this.numAnnotations.put(id, num); return num; } }
private Matrix initialiseTransitionProbabilities() { // 1. initialise transitionprobabilities // we use a sparse matrix, so we don't need to put zeroes anywhere. // Matrix P = new Matrix(this.getNumGoTerms(), this.getNumGoTerms()); Matrix P = new SparseMatrix(this.getNumGoTerms(), this.getNumGoTerms()); // 0.1 check if the node is a leaf, if it is, put a 1 into it. for (GOTerm currentGoTerm : this.subGoTerms) { if (this.leafs.contains(currentGoTerm.getNumericId())) { int leafIndex = this.goTermIndex.get(currentGoTerm.getNumericId()); P.set(leafIndex, leafIndex, 1.0f); continue; } } for (GOTerm currentGoTerm : this.subGoTerms) { int N_v = this.getNumberOfAnnotations(currentGoTerm); int N_vStar = this.getNumberOfAnnotationsStar(currentGoTerm); this.setTransitionProbabilitiesNonLeaf(currentGoTerm, N_v, N_vStar, P); } return P; }
private void setAllLeafs() { for (GOTerm term : this.subGoTerms) { boolean isLeaf = true; for (String relation : this.relations) { // is it a leaf for ALL the ontologies? otherwise, // it is not really a leaf.. if (!this.isLeafISM(term, relation)) { isLeaf = false; break; } } if (isLeaf) { this.leafs.add(term.getNumericId()); } } }
private Matrix getMatrixA() throws IOException { // Matrix A = new Matrix(this.getNumGoTerms(), this.annotations.sizeGenes()); Matrix A = new Matrix(this.getNumGoTerms(), this.RWC.getColumnDimension()); for (GOTerm currentGoTerm : this.subGoTerms) { // 0. check for NStar value > 0, since this indicates there // is an annotation if (this.getNumberOfAnnotationsStar(currentGoTerm) > 0) { // 0. Get all the genes annotating the current node // this will hodl the difference of the parent set with the child set Set<String> uniqueAnnotations = new HashSet<String>(this.annotations.getProteinsForGOTerm(currentGoTerm.getGOid())); // 1. Get all the genes annotating the children of the current node // 1.0 get all the children Set<GOTerm> children = new HashSet<GOTerm>(); for (String currentRelation : this.relations) { children.addAll(currentGoTerm.getChildrenForRelation(currentRelation)); } // 1.1 get all the genes for the children. Set<String> childrenAnnotations = new HashSet<String>(); for (GOTerm currentChild : children) { childrenAnnotations.addAll(this.annotations.getProteinsForGOTerm(currentChild.getGOid())); } // 2. Traverse the difference and count the number of terms annotating this et of genes. // 2.0 obtain the difference between the two nodes. that is, the annotations // that are unique to the current node.. uniqueAnnotations.removeAll(childrenAnnotations); for (String uniqueAnnotation : uniqueAnnotations) { // this is a tricky one. We are not sure what "directly" means in the paper. // but it should not be a very complicated problem to solve. int count = this.annotations.getGOTermScoresForProteinId(uniqueAnnotation).keySet().size(); // 0. get the protein id. A.set( this.goTermIndex.get(currentGoTerm.getNumericId()), this.proteinIndices.get(uniqueAnnotation), 1.0f / count); } } } this.logger.showMessage("Matrix A computed. % of sparseness = " + A.getSparsenessPercentage()); return A; }
private int getNumberOfAnnotationsStar(GOTerm currentGoTerm) throws IllegalArgumentException { // look for the annotations in the cache, if we find it, return it. int id = currentGoTerm.getNumericId(); if (this.numAnnotationsStar.containsKey(id)) { return this.numAnnotationsStar.get(id); } else { // this number is not cached yet, so we need to compute it. // 0. get number of annotations for this node. int currentGoTermAnnotationCount = this.getNumberOfAnnotations(currentGoTerm); // 1. get all annotations for the children. Set<GOTerm> children = new HashSet<GOTerm>(); for (String currentRelation : this.relations) { // again, we consider all relations at once. children.addAll(currentGoTerm.getChildrenForRelation(currentRelation)); } Set<String> proteinsAnnotatedToChildren = new HashSet<String>(); for (GOTerm child : children) { proteinsAnnotatedToChildren.addAll(this.annotations.getProteinsForGOTerm(child.getGOid())); } // there is no need to actually make the set difference. Once we // get the number of annotations in the parent, we should just // substract to that number the number of unique annotations in the // children. This would be the number of annotations in the parent // that belong to none of the children. int childrenAnnotationCount = proteinsAnnotatedToChildren.size(); int retVal = currentGoTermAnnotationCount - childrenAnnotationCount; this.numAnnotationsStar.put(id, retVal); return retVal; } }
private void setTransitionProbabilitiesNonLeaf( GOTerm currentGoTerm, int N_v, int N_vStar, Matrix P) { // P(v,c) = (1 - N_v* / N_v) N_c/(Sum{u: v->u} N_u_) // P(v,c) = A * N_C/B if (N_v == N_vStar) { // The fact that N_v is equal to N_vStar // gives a 0.0 in the matrix entries, because of the leading factor // 1-N_v/N_vStar, which does not to be specified due to the sparse // matrix used... return; } float A = (1.0f - ((float) N_vStar / (float) N_v)); // we get all the children // now, this is a tricky one. // the way I understand it so far is that all relations are the same, that is, // the random walker will not differentiate between a part_of and a is_a relation // so, we get ALL the children, adding them all up in a list, disregarding the relations // completely. This is viable, since the random walker would be able to reach that // node following either one of the relations. // children will the contain ALL the children, following every possible relation // the number of children per node is not very large, ArrayList is O(1) insertion, and O(n) // so it's ok. Set<GOTerm> children = new HashSet<GOTerm>(); for (String currentRelation : this.relations) { children.addAll(currentGoTerm.getChildrenForRelation(currentRelation)); } // we count all the annotations in the children // B keesp the sum of N_u for every child. // this number is the same given eery child of v. int N_u = 0; for (GOTerm currentChild : children) { N_u += this.getNumberOfAnnotations(currentChild); } // we need to use two loops. First we compute the total sum of // annotations in the children, and the we modify the matrix P. // keep in mind that these nodes are allways non leaf nodes, therefore, B // should not remain zero unless the children are actually not annotating // any genes. // this is why we specify an initial value for B. This will put // a very low transition probability to that node. // P(v,c) = A * N_C/B final float inv_N_u = 1.0f / N_u; Map<Integer, GOTerm> sortedChildren = new TreeMap<Integer, GOTerm>(); for (GOTerm currentChild : children) { sortedChildren.put(currentChild.getNumericId(), currentChild); } for (int currentChildId : sortedChildren.keySet()) { GOTerm currentChild = sortedChildren.get(currentChildId); int N_c = this.getNumberOfAnnotations(currentChild); // we check whether the annotations are in excess of zero. // if the node does not have any annotations, then there is no need // in putting anything in it, since the structure is a sparse matrix. if (N_c > 0) { float newEntry = A * (N_c * inv_N_u); int v = this.goTermIndex.get(currentGoTerm.getNumericId()); int c = this.goTermIndex.get(currentChild.getNumericId()); P.set(c, v, newEntry); } } }