コード例 #1
0
  /**
   * Returns an array of strings representing the GO terms with the best direct coverage of the
   * remaining uncovered genes. Each string in the array is in the format <GOTerm name> (<GOTerm
   * id>): <Number of remaining genes covered>. If a user gene set has been imported, than the gene
   * set considered for coverage is the user gene set. Otherwise, the returned strings represent the
   * nodes with the best coverage of the complete gene set.
   *
   * @param selectedGONodes the set of GO term nodes which have already been selected
   * @param numTerms the number of terms to return in the array
   * @return an array of strings representing the GO terms with the best direct coverage of the
   *     remaining uncovered genes. The size of the array will be the minimum of numTerms and the
   *     number of GOTerms with a gene coverage greater than 0.
   */
  public String[] getTopTermsStringsDirect(Set<Node> selectedGONodes, int numTerms) {
    CyNetwork network = Cytoscape.getCurrentNetwork();

    Set<String> geneIds = null;

    // Determine the gene set for which coverage of Go terms should be determined
    boolean userGenesImported = session.isUserGeneSetImported();
    if (userGenesImported) { // user gene set was imported, so use user gene set
      geneIds = new HashSet<String>(session.getUserGeneSet());
    } else { // no user gene set, use all genes
      geneIds = session.getGaru().getGeneIds();
    }

    ArrayList<GOSetTerm> goTermList = new ArrayList<GOSetTerm>();

    // Get list of covered genes
    Set<String> coveredGenes = new HashSet<String>();
    for (Node node : selectedGONodes) {
      coveredGenes.addAll(
          GOSlimmerUtil.listToSet(
              GOSlimmerUtil.getGenesCoveredByGoNode(node, false, userGenesImported)));
    }

    // Get list of uncovered genes from covered genes and all genes
    Set<String> uncoveredGenes = new HashSet<String>();
    for (String gene : geneIds) {
      if (!coveredGenes.contains(gene)) {
        uncoveredGenes.add(gene);
      }
    }
    int numUncoveredGenes = uncoveredGenes.size();

    // Sort nodes by coverage for remaining genes.

    // Iterate through all nodes in network, and determine the coverage of the remaining uncovered
    // genes.
    Iterator nodes_i = network.nodesIterator();
    while (nodes_i.hasNext()) {
      Node node = (Node) nodes_i.next();
      if (!selectedGONodes.contains(node)) {

        Set<String> nodeCoveredGenes =
            new HashSet<String>(
                GOSlimmerUtil.getGenesCoveredByGoNode(node, false, userGenesImported));

        Set<String> newCoveredGenes = new HashSet<String>();

        // Select set to iterate through depending on size to cut down on execution time
        if (nodeCoveredGenes.size() < numUncoveredGenes) {
          for (String gene : nodeCoveredGenes) {
            if (uncoveredGenes.contains(gene)) {
              newCoveredGenes.add(gene);
            }
          }
        } else {
          for (String gene : uncoveredGenes) {
            if (nodeCoveredGenes.contains(gene)) {
              newCoveredGenes.add(gene);
            }
          }
        }

        // Create GOSetTerm and add to list if it covers at least one gene
        if (newCoveredGenes.size() > 0) {
          GOSetTerm goSetTerm = new GOSetTerm(node, newCoveredGenes.size());

          goTermList.add(goSetTerm);
        }
      }
    }

    // Sort list based on number of remaining genes covered
    Collections.sort(goTermList);

    // Build string array to return
    int numSortedTerms = goTermList.size();
    int actualNumTerms = Math.min(numTerms, numSortedTerms);
    String[] goTermsStr = new String[actualNumTerms];
    for (int i = 0; i < actualNumTerms; i++) {
      goTermsStr[i] = (goTermList.get(numSortedTerms - 1 - i)).getDescriptiveString();
    }

    return goTermsStr;
  }
コード例 #2
0
  /**
   * Method to get the GO term node with the best inferred coverage of the genes specified in
   * 'geneIds'.
   *
   * @param selectedGONodes the set of GO term nodes which have already been selected
   * @param geneIds the set of geneIds for which to determine the best inferred coverage
   * @param userGenesImported true if a user gene set has been imported, false otherwise.
   * @return GO term node with the highest inferred coverage of genes specified in 'geneIds'. Ties
   *     are broken randomly.
   */
  public Node getNextBestTermInferred(
      Set<Node> selectedGONodes, Set<String> geneIds, boolean userGenesImported) {

    CyNetwork network = Cytoscape.getCurrentNetwork();

    // Get list of covered genes
    Set<String> coveredGenes = new HashSet<String>();
    for (Node node : selectedGONodes) {
      coveredGenes.addAll(
          GOSlimmerUtil.listToSet(
              GOSlimmerUtil.getGenesCoveredByGoNode(node, true, userGenesImported)));
    }

    // Get list of uncovered genes from covered genes and all genes
    Set<String> uncoveredGenes = new HashSet<String>();
    for (String gene : geneIds) {
      if (!coveredGenes.contains(gene)) {
        uncoveredGenes.add(gene);
      }
    }
    int numUncoveredGenes = uncoveredGenes.size();

    // Rank nodes by coverage for remaining nodes.  If node is a parent of selected nodes, then rank
    // is 0

    int topRank = -1;
    Node topRankedNode = null;

    Iterator nodes_i = network.nodesIterator();
    while (nodes_i.hasNext()) {

      Node node = (Node) nodes_i.next();
      if (!selectedGONodes.contains(node)) {

        // Check and see if it is a parent of a select node

        if (!isAncestorOfNodeInSet(node, selectedGONodes)) {

          Set<String> nodeCoveredGenes =
              new HashSet<String>(
                  GOSlimmerUtil.getGenesCoveredByGoNode(node, true, userGenesImported));

          Set<String> newCoveredGenes = new HashSet<String>();

          // Select set to iterate through depending on size to cut down on execution time
          if (nodeCoveredGenes.size() < numUncoveredGenes) {
            for (String gene : nodeCoveredGenes) {
              if (uncoveredGenes.contains(gene)) {
                newCoveredGenes.add(gene);
              }
            }
          } else {
            for (String gene : uncoveredGenes) {
              if (nodeCoveredGenes.contains(gene)) {
                newCoveredGenes.add(gene);
              }
            }
          }

          // Check if this node covers the most uncovered genes so far - if so, mark as top ranked
          // node
          if (newCoveredGenes.size() > topRank) {
            topRank = newCoveredGenes.size();
            topRankedNode = node;
          }
        }
      }
    }

    System.out.println("top ranked node: " + topRankedNode + " with a rank of " + topRank);

    if (topRank == 0) {
      return null;
    }
    return topRankedNode;
  }