Exemplo n.º 1
0
  private Matrix getMatrixA() throws IOException {

    // Matrix A = new Matrix(this.getNumGoTerms(), this.annotations.sizeGenes());
    Matrix A = new Matrix(this.getNumGoTerms(), this.RWC.getColumnDimension());
    for (GOTerm currentGoTerm : this.subGoTerms) {
      // 0. check for NStar value > 0, since this indicates there
      // is an annotation
      if (this.getNumberOfAnnotationsStar(currentGoTerm) > 0) {
        // 0. Get all the genes annotating the current node
        // this will hodl the difference of the parent set with the child set
        Set<String> uniqueAnnotations =
            new HashSet<String>(this.annotations.getProteinsForGOTerm(currentGoTerm.getGOid()));
        // 1. Get all the genes annotating the children of the current node
        // 1.0 get all the children
        Set<GOTerm> children = new HashSet<GOTerm>();
        for (String currentRelation : this.relations) {
          children.addAll(currentGoTerm.getChildrenForRelation(currentRelation));
        }
        // 1.1 get all the genes for the children.
        Set<String> childrenAnnotations = new HashSet<String>();
        for (GOTerm currentChild : children) {
          childrenAnnotations.addAll(this.annotations.getProteinsForGOTerm(currentChild.getGOid()));
        }
        // 2. Traverse the difference and count the number of terms annotating this et of genes.
        // 2.0 obtain the difference between the two nodes. that is, the annotations
        // that are unique to the current node..
        uniqueAnnotations.removeAll(childrenAnnotations);
        for (String uniqueAnnotation : uniqueAnnotations) {
          // this is a tricky one. We are not sure what "directly" means in the paper.
          // but it should not be a very complicated problem to solve.
          int count =
              this.annotations.getGOTermScoresForProteinId(uniqueAnnotation).keySet().size();
          // 0. get the protein id.
          A.set(
              this.goTermIndex.get(currentGoTerm.getNumericId()),
              this.proteinIndices.get(uniqueAnnotation),
              1.0f / count);
        }
      }
    }
    this.logger.showMessage("Matrix A computed. % of sparseness = " + A.getSparsenessPercentage());
    return A;
  }
Exemplo n.º 2
0
  private void setRandomWalkContributionGeneWise(Matrix W) throws IOException {
    // 0. get matrix A
    this.logger.showTimedMessage("Getting matrix A");
    Matrix A = this.getMatrixA();
    // 1. multiply both matrices.
    this.logger.showTimedMessage("Getting matrix B");
    SparseMatrix W_ = W.getSparseMatrix(this.leafIndices, this.allIndices);
    this.logger.showMessage(
        "Matrix W_ computed. % of sparseness = " + W_.getSparsenessPercentage());
    Matrix B = W_.times(A); // TODO: optimize this, A is always very sparse
    this.logger.showMessage("Matrix B computed. % of sparseness = " + B.getSparsenessPercentage());

    // 2. calculate the RWC
    // 2.0 traverse all the products.
    // set the value for all eht rows for this column and this row
    // for RWC column_index  == row_index
    this.logger.showTimedMessage("Computing RWC matrix as succesive Jaccard indexes");
    final int N = this.RWC.getRowDimension(), M = this.RWC.getColumnDimension();

    if (this.weightedJaccard) {
      this.logger.showTimedMessage("Jaccard index, _with_ IC");
      // precomputing IC for each leaf

      final float invMaxAnnot = 1.0f / (float) this.maxNumberOfAnnotations;
      float IC[] = new float[this.leafIndices.length];
      for (int i = 0; i < this.leafIndices.length; i++) {
        // we need to fetch  the information content of the nodes if we use weighted jaccard.
        IC[i] =
            (float)
                -Math.log(
                    this.numAnnotations.get(this.gotermIdByIndex.get(this.leafIndices[i]))
                        * invMaxAnnot);
      }

      for (int i = 0; i < N; i++) {
        float column_i[] = B.getColumn(i);
        for (int j = i; j < M; j++) {
          float jaccardIndex = this.getJaccardIndexWithIC(column_i, B.getColumn(j), IC);
          this.RWC.set(i, j, jaccardIndex);
          this.RWC.set(j, i, jaccardIndex);
        }
      }
    } else {
      this.logger.showTimedMessage("Jaccard index, _without_ IC");
      float sums[] = new float[N];
      for (int i = 0; i < N; i++) {
        float sum = 0.0f;
        for (float val : B.getColumn(i)) {
          sum += val;
        }
        sums[i] = sum;
      }

      for (int i = 0; i < N; i++) {
        float column_i[] = B.getColumn(i);
        for (int j = i; j < M; j++) {
          float jaccardIndex =
              this.getJaccardIndexWithoutIC(column_i, B.getColumn(j), sums[i], sums[j]);
          this.RWC.set(i, j, jaccardIndex);
          this.RWC.set(j, i, jaccardIndex);
        }
      }
    }
    this.logger.showTimedMessage("RWC set!");
  }