コード例 #1
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  // returns the distribution of trainingNode memeber genes among left and right children
  private Dimension runNodeEpoch(SOTACell trainingNode) {

    SOTACell myCell = null;
    SOTACell sisterCell = null;
    int rightCnt = 0;
    int leftCnt = 0;
    int memberGene = 0;

    // for all genes in the training node, find closest child, migrate child
    for (int geneNum = 0; geneNum < trainingNode.members.size(); geneNum++) {

      memberGene = ((Integer) trainingNode.members.elementAt(geneNum)).intValue();

      myCell = findMyDaughterCell(trainingNode, memberGene); // only look among children
      // dont add to membership

      // later make sure that left and right membership set is not null
      if (myCell == trainingNode.left) leftCnt++;
      else rightCnt++;

      myCell.migrateCentroid(memberGene, migW);

      sisterCell = findSister(myCell);

      // if sister has no offspring then migrate parent and sister
      if (sisterCell.left == null && sisterCell.right == null) {
        myCell.parent.migrateCentroid(memberGene, migP);
        sisterCell.migrateCentroid(memberGene, migS);
      }
    }
    return new Dimension(leftCnt, rightCnt);
  }
コード例 #2
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  private void divideCell(SOTACell cellToDivide) {

    float[] parentCentroid;

    cellToDivide.left = new SOTACell(numberOfSamples, dataMatrix);
    cellToDivide.right = new SOTACell(numberOfSamples, dataMatrix);

    numberOfClusters++;

    cellToDivide.left.parent = cellToDivide;
    cellToDivide.right.parent = cellToDivide;

    cellToDivide.right.pred = cellToDivide.left;
    cellToDivide.left.succ = cellToDivide.right;

    if (cellToDivide.pred != null) {
      cellToDivide.left.pred = cellToDivide.pred;
      cellToDivide.left.pred.succ = cellToDivide.left;
    } else cellToDivide.left.pred = null;

    if (cellToDivide.succ != null) {
      cellToDivide.right.succ = cellToDivide.succ;
      cellToDivide.right.succ.pred = cellToDivide.right;
    } else cellToDivide.right.succ = null;

    if (cellToDivide == head) head = cellToDivide.left;

    cellToDivide.succ = null;
    cellToDivide.pred = null;

    for (int i = 0; i < numberOfSamples; i++) {
      cellToDivide.left.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i));
      cellToDivide.right.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i));
    }
  }
コード例 #3
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  private SOTACell findMyCell(int geneNum) {
    SOTACell curr = head;

    SOTACell myClosestCell = head;
    double keyDist = Float.POSITIVE_INFINITY;
    double currDist = 0;

    while (curr != null) {

      currDist =
          ExperimentUtil.geneDistance(
              dataMatrix, curr.centroidGene, geneNum, 0, function, factor, absolute);

      if (currDist <= keyDist) {
        keyDist = currDist;
        myClosestCell = curr;
      }
      curr = curr.succ;
    }

    if (myNucleus[geneNum] != myClosestCell) {
      myNucleus[geneNum] = myClosestCell;
      myClosestCell.addMember(geneNum);
    }

    return myClosestCell;
  }
コード例 #4
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  private SOTACell findMyCellInSubTree(SOTACell trainingCell, int geneNum, int level) {

    SOTACell currCell = trainingCell;
    SOTACell myCell = trainingCell;
    int levelIndex = 0;

    while (currCell.parent != null && levelIndex < level) {
      currCell = currCell.parent;
      levelIndex++;
    }
    // now currNode is at root, or 'level' number of nodes above the training node

    Vector cellList = new Vector();

    getCellsBelow(cellList, currCell);

    float minDist = Float.POSITIVE_INFINITY;
    float currDist;

    for (int i = 0; i < cellList.size(); i++) {
      currCell = (SOTACell) (cellList.elementAt(i));
      currDist =
          ExperimentUtil.geneDistance(
              dataMatrix, currCell.centroidGene, geneNum, 0, function, factor, absolute);
      if (currDist < minDist) {
        minDist = currDist;
        myCell = currCell;
      }
    }

    if (myNucleus[geneNum] != myCell) {

      myNucleus[geneNum] = myCell;
      myCell.addMember(geneNum);
    }
    return myCell;
  }
コード例 #5
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  /**
   * Performs SOTA tree construction given parameters provided in <code>AlgorithmData</code>.
   * Results are returned in AlgorthmData
   */
  public AlgorithmData execute(AlgorithmData data) throws AlgorithmException {

    // Get parameters
    AlgorithmParameters params = data.getParams();
    sotaGenes = params.getBoolean("sota-cluster-genes", true);
    maxNumEpochs = params.getInt("max-epochs-per-cycle", 1000);
    maxNumCycles = params.getInt("max-number-of-cycles", 10);
    epochCriteria = params.getFloat("epoch-improvement-cutoff");
    endCriteria = params.getFloat("end-training-diversity");
    runToMaxCycles = params.getBoolean("run-to-max-cycles");
    useClusterVariance = params.getBoolean("use-cluster-variance", false);
    function = params.getInt("distance-function", EUCLIDEAN);
    absolute = params.getBoolean("distance-absolute", true);
    calcClusterHCL = params.getBoolean("calcClusterHCL", false);
    calculate_genes = params.getBoolean("calculate-genes", false);
    calculate_experiments = params.getBoolean("calculate-experiments", false);
    calcFullTreeHCL = params.getBoolean("calcFullTreeHCL", false);
    method = params.getInt("method-linkage", 0);
    pValue = params.getFloat("pValue", (float) 0.05);
    migW = params.getFloat("mig_w", (float) 0.01);
    migP = params.getFloat("mig_p", (float) 0.005);
    migS = params.getFloat("mig_s", (float) 0.001);
    neighborhoodLevel = params.getInt("neighborhood-level", 5);

    hcl_function = params.getInt("hcl-distance-function", EUCLIDEAN);
    hcl_absolute = params.getBoolean("hcl-distance-absolute", false);

    inData = data; // keep a handle on AlgorithmData for return

    // Set factor based on function
    if ((function == PEARSON)
        || (function == PEARSONUNCENTERED)
        || (function == PEARSONSQARED)
        || (function == COSINE)
        || (function == COVARIANCE)
        || (function == DOTPRODUCT)
        || (function == SPEARMANRANK)
        || (function == KENDALLSTAU)) {
      myFactor = -1.0f;
    } else {
      myFactor = 1.0f;
    }

    factor = (float) 1.0; // scaling factor sent to getDistance methods
    inData.addParam("factor", String.valueOf(myFactor)); // return factor
    endCriteria *= myFactor; // alter polarity fo endCriteria based on metric
    treeDiversity = Float.POSITIVE_INFINITY;
    dataMatrix = data.getMatrix("experiment"); // point dataMatrix at supplied matrix
    numberOfGenes = dataMatrix.getRowDimension();
    numberOfSamples = dataMatrix.getColumnDimension();
    myNucleus = new SOTACell[numberOfGenes]; // will be shortcut from gene index to a cell
    cycleDiversity = new Vector();

    // reset max number of cycles if limited by number of genes
    if (maxNumCycles >= numberOfGenes) maxNumCycles = numberOfGenes - 1;

    // if using variablility, resample data, select cutoff based on p value supplied
    if (useClusterVariance) {
      endCriteria = resampleAndGetNewCutoff(dataMatrix, pValue);
    }

    // initialize first cell and two children
    root = new SOTACell(numberOfSamples, dataMatrix);
    root.right = new SOTACell(numberOfSamples, dataMatrix);
    root.left = new SOTACell(numberOfSamples, dataMatrix);
    numberOfClusters = 2;
    root.left.parent = root;
    root.right.parent = root;
    head = root.left;
    root.left.succ = root.right;
    root.right.pred = root.left;

    int[] numberOfValidGenesInSample = new int[numberOfSamples];
    // set to zero
    for (int i = 0; i < numberOfSamples; i++) numberOfValidGenesInSample[i] = 0;

    // Inialize centroid root centroid to zeros
    for (int i = 0; i < numberOfSamples; i++) {
      root.centroidGene.set(0, i, 0);
    }

    for (int i = 0; i < numberOfGenes; i++) {
      root.members.add(new Integer(i)); // add all gene indices to root
      myNucleus[i] = root; // set all gene nuclei to point to root
      for (int j = 0; j < numberOfSamples; j++) {
        if (!(Float.isNaN(dataMatrix.get(i, j)))) {
          numberOfValidGenesInSample[j]++; // count number of genes with valid data in each sample

          root.centroidGene.set(
              0, j, root.centroidGene.get(0, j) + dataMatrix.get(i, j)); // calcualtes sum
        }
      }
    }

    mostDiverseCell = root;
    mostVariableCell = root;

    for (int j = 0; j < numberOfSamples; j++) {
      root.centroidGene.set(
          0,
          j,
          root.centroidGene.get(0, j) / numberOfValidGenesInSample[j]); // get a mean root centroid
      root.left.centroidGene.set(0, j, root.centroidGene.get(0, j)); // assign to children
      root.right.centroidGene.set(0, j, root.centroidGene.get(0, j));
    }

    // put first value into diversity vector
    initDivSum = getNodeDiversitySum(root);
    cycleDiversity.add(new Float(initDivSum));
    root.cellDiversity = initDivSum / numberOfGenes;
    if (useClusterVariance) root.cellVariance = getNodeVariance(root);

    if (runToMaxCycles) growSOTUnrestricted(); // make tree w/o regard to diversity
    else growSOT(); // Construct tree

    // If performing HCL on samples using all genes
    if (calcFullTreeHCL) {
      calcFullTreeHCL();
    }

    // Code for HCL clustering
    if (calcClusterHCL) {
      calculateClusterHCL(); // calculate HCL trees for SOTA clusters
    }

    return inData; // inData has results incorporated
  }
コード例 #6
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  // sets cell diversities, and variances (if required)
  private void setDiversities() {
    SOTACell curr = head;
    double cellSum = 0;
    double cellVar = 0;
    double treeSum = 0;
    double maxCellDiv = -1;
    double maxCellVar = -1;
    int numberOfCells = 0;
    double currDist = 0;

    mostDiverseCell = head;
    mostVariableCell = head;

    while (curr != null) {

      numberOfCells++;
      cellSum = 0;

      // for all members of the node get distance to set cell resource (diversity)
      for (int i = 0; i < curr.members.size(); i++) {
        cellSum +=
            ExperimentUtil.geneDistance(
                dataMatrix,
                curr.centroidGene,
                ((Integer) (curr.members.elementAt(i))).intValue(),
                0,
                function,
                factor,
                absolute);
      }

      curr.cellDiversity = (cellSum / curr.members.size());

      if (curr.cellDiversity > maxCellDiv && curr.members.size() > 1) {
        maxCellDiv = curr.cellDiversity;
        mostDiverseCell = curr;
      }

      treeSum += cellSum;

      if (useClusterVariance) { // using cell variance, need to find mostVariable cell
        cellVar = 0;
        currDist = 0;

        // get cell varience
        // if new members have been added
        if (curr.changedMembership) {
          // use max gene to gene distance
          for (int i = 0; i < curr.members.size(); i++) {
            for (int j = 0; j < curr.members.size(); j++) {

              currDist =
                  ExperimentUtil.geneDistance(
                      dataMatrix,
                      null,
                      ((Integer) (curr.members.elementAt(i))).intValue(),
                      ((Integer) (curr.members.elementAt(j))).intValue(),
                      function,
                      factor,
                      absolute);

              // get max dist. to be cellVar
              if (currDist > cellVar) {
                cellVar = currDist;
              }
            }
          }
          curr.cellVariance = cellVar;
        } else // no change to membership so we dont hve to recalculate variance
        cellVar = curr.cellVariance;

        if (cellVar > maxCellVar && curr.members.size() > 1) {
          maxCellVar = cellVar;
          mostVariableCell = curr;
        }
      }
      curr.changedMembership = false; // variance already set for current population
      curr = curr.succ;
    }
    treeDiversity = treeSum;
  }
コード例 #7
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  // Note that leaves are threaded from left to right.
  // This means that if displayed top to bottom, centroids would be reversed
  // Therefore, accumulate in reverse order into AlgorithmData
  private void getResults() {

    SOTACell curr = head;
    int numCells = 0;
    FloatMatrix centroidFM = new FloatMatrix(numberOfClusters, numberOfSamples);
    FloatMatrix varianceFM = new FloatMatrix(numberOfClusters, numberOfSamples);

    int[] clusterSize = new int[numberOfClusters];
    FloatMatrix clusterDiversity = new FloatMatrix(numberOfClusters, 1);
    int numDiv = cycleDiversity.size();
    FloatMatrix cycleDivFM = new FloatMatrix(numDiv, 1);

    int[] clusterOrder = new int[numberOfClusters];

    clusters = new Cluster();
    NodeList nodeList = clusters.getNodeList();
    Node newNode;
    int[] clusterMembership;
    int clusterPop;

    // move to tail
    while (curr.succ != null) curr = curr.succ;

    // now curr is at the tail
    while (numCells <= numberOfClusters && curr != null) {

      for (int i = 0; i < numberOfSamples; i++) {
        centroidFM.set(numCells, i, curr.centroidGene.get(0, i));
        varianceFM.set(numCells, i, curr.getColumnVar(i));
      }
      clusterPop = curr.members.size();
      clusterSize[numCells] = clusterPop;
      clusterDiversity.set(
          numCells,
          0,
          (float) curr.cellDiversity
              * (float) myFactor); // alter poloarity by myFactor based on metric
      clusterOrder[numCells] = numCells;

      // accumulate cluster probe indicies
      clusterMembership = new int[clusterPop];
      for (int i = 0; i < clusterPop; i++) {
        clusterMembership[i] = ((Integer) (curr.members.elementAt(i))).intValue();
      }

      newNode = new Node();
      newNode.setProbesIndexes(clusterMembership);
      nodeList.addNode(newNode);

      numCells++;
      curr = curr.pred;
    }

    // now accumlate cycle divresity information
    if (myFactor == 1) {
      float initDiv = ((Float) (cycleDiversity.elementAt(0))).floatValue();
      for (int i = 0; i < numDiv; i++) {
        cycleDivFM.set(i, 0, (((Float) (cycleDiversity.elementAt(i))).floatValue()) / initDiv);
      }
    } else {
      float lowerLim = numberOfGenes * myFactor;
      float initDiv = ((Float) (cycleDiversity.elementAt(0))).floatValue() + Math.abs(lowerLim);
      for (int i = 0; i < numDiv; i++) {
        cycleDivFM.set(
            i,
            0,
            (((Float) (cycleDiversity.elementAt(i))).floatValue() + Math.abs(lowerLim)) / initDiv);
      }
    }
    // put all important information into AlgorithmData
    inData.addParam("cycles", String.valueOf(numberOfClusters));
    inData.addCluster("cluster", clusters);
    inData.addMatrix("centroid-matrix", centroidFM);
    inData.addMatrix("cluster-variances", varianceFM);
    inData.addMatrix("cluster-diversity", clusterDiversity);
    inData.addMatrix("cycle-diversity", cycleDivFM);
    inData.addIntArray("cluster-population", clusterSize);

    // Additions to AlgorithmData to allow drawing arrays
    float[] nodeHeight = new float[numberOfClusters * 2];
    int[] nodePopulation = new int[numberOfClusters * 2];
    int[] leftChild = new int[nodeHeight.length * 2];
    int[] rightChild = new int[nodeHeight.length * 2];

    initializeReturnValues(nodeHeight, nodePopulation, leftChild, rightChild);
    utilCounter = 0;
    loadReturnValues(root, 0, nodeHeight, nodePopulation, leftChild, rightChild);
    inData.addMatrix("node-heights", new FloatMatrix(nodeHeight, nodeHeight.length));
    inData.addIntArray("left-child", leftChild);
    inData.addIntArray("right-child", rightChild);
    inData.addIntArray("node-population", nodePopulation);

    if (useClusterVariance) inData.addParam("computed-var-cutoff", String.valueOf(endCriteria));
    return;
  }