コード例 #1
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  private void divideCell(SOTACell cellToDivide) {

    float[] parentCentroid;

    cellToDivide.left = new SOTACell(numberOfSamples, dataMatrix);
    cellToDivide.right = new SOTACell(numberOfSamples, dataMatrix);

    numberOfClusters++;

    cellToDivide.left.parent = cellToDivide;
    cellToDivide.right.parent = cellToDivide;

    cellToDivide.right.pred = cellToDivide.left;
    cellToDivide.left.succ = cellToDivide.right;

    if (cellToDivide.pred != null) {
      cellToDivide.left.pred = cellToDivide.pred;
      cellToDivide.left.pred.succ = cellToDivide.left;
    } else cellToDivide.left.pred = null;

    if (cellToDivide.succ != null) {
      cellToDivide.right.succ = cellToDivide.succ;
      cellToDivide.right.succ.pred = cellToDivide.right;
    } else cellToDivide.right.succ = null;

    if (cellToDivide == head) head = cellToDivide.left;

    cellToDivide.succ = null;
    cellToDivide.pred = null;

    for (int i = 0; i < numberOfSamples; i++) {
      cellToDivide.left.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i));
      cellToDivide.right.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i));
    }
  }
コード例 #2
0
ファイル: SOTA.java プロジェクト: SamGG/mev-tm4
  /**
   * Performs SOTA tree construction given parameters provided in <code>AlgorithmData</code>.
   * Results are returned in AlgorthmData
   */
  public AlgorithmData execute(AlgorithmData data) throws AlgorithmException {

    // Get parameters
    AlgorithmParameters params = data.getParams();
    sotaGenes = params.getBoolean("sota-cluster-genes", true);
    maxNumEpochs = params.getInt("max-epochs-per-cycle", 1000);
    maxNumCycles = params.getInt("max-number-of-cycles", 10);
    epochCriteria = params.getFloat("epoch-improvement-cutoff");
    endCriteria = params.getFloat("end-training-diversity");
    runToMaxCycles = params.getBoolean("run-to-max-cycles");
    useClusterVariance = params.getBoolean("use-cluster-variance", false);
    function = params.getInt("distance-function", EUCLIDEAN);
    absolute = params.getBoolean("distance-absolute", true);
    calcClusterHCL = params.getBoolean("calcClusterHCL", false);
    calculate_genes = params.getBoolean("calculate-genes", false);
    calculate_experiments = params.getBoolean("calculate-experiments", false);
    calcFullTreeHCL = params.getBoolean("calcFullTreeHCL", false);
    method = params.getInt("method-linkage", 0);
    pValue = params.getFloat("pValue", (float) 0.05);
    migW = params.getFloat("mig_w", (float) 0.01);
    migP = params.getFloat("mig_p", (float) 0.005);
    migS = params.getFloat("mig_s", (float) 0.001);
    neighborhoodLevel = params.getInt("neighborhood-level", 5);

    hcl_function = params.getInt("hcl-distance-function", EUCLIDEAN);
    hcl_absolute = params.getBoolean("hcl-distance-absolute", false);

    inData = data; // keep a handle on AlgorithmData for return

    // Set factor based on function
    if ((function == PEARSON)
        || (function == PEARSONUNCENTERED)
        || (function == PEARSONSQARED)
        || (function == COSINE)
        || (function == COVARIANCE)
        || (function == DOTPRODUCT)
        || (function == SPEARMANRANK)
        || (function == KENDALLSTAU)) {
      myFactor = -1.0f;
    } else {
      myFactor = 1.0f;
    }

    factor = (float) 1.0; // scaling factor sent to getDistance methods
    inData.addParam("factor", String.valueOf(myFactor)); // return factor
    endCriteria *= myFactor; // alter polarity fo endCriteria based on metric
    treeDiversity = Float.POSITIVE_INFINITY;
    dataMatrix = data.getMatrix("experiment"); // point dataMatrix at supplied matrix
    numberOfGenes = dataMatrix.getRowDimension();
    numberOfSamples = dataMatrix.getColumnDimension();
    myNucleus = new SOTACell[numberOfGenes]; // will be shortcut from gene index to a cell
    cycleDiversity = new Vector();

    // reset max number of cycles if limited by number of genes
    if (maxNumCycles >= numberOfGenes) maxNumCycles = numberOfGenes - 1;

    // if using variablility, resample data, select cutoff based on p value supplied
    if (useClusterVariance) {
      endCriteria = resampleAndGetNewCutoff(dataMatrix, pValue);
    }

    // initialize first cell and two children
    root = new SOTACell(numberOfSamples, dataMatrix);
    root.right = new SOTACell(numberOfSamples, dataMatrix);
    root.left = new SOTACell(numberOfSamples, dataMatrix);
    numberOfClusters = 2;
    root.left.parent = root;
    root.right.parent = root;
    head = root.left;
    root.left.succ = root.right;
    root.right.pred = root.left;

    int[] numberOfValidGenesInSample = new int[numberOfSamples];
    // set to zero
    for (int i = 0; i < numberOfSamples; i++) numberOfValidGenesInSample[i] = 0;

    // Inialize centroid root centroid to zeros
    for (int i = 0; i < numberOfSamples; i++) {
      root.centroidGene.set(0, i, 0);
    }

    for (int i = 0; i < numberOfGenes; i++) {
      root.members.add(new Integer(i)); // add all gene indices to root
      myNucleus[i] = root; // set all gene nuclei to point to root
      for (int j = 0; j < numberOfSamples; j++) {
        if (!(Float.isNaN(dataMatrix.get(i, j)))) {
          numberOfValidGenesInSample[j]++; // count number of genes with valid data in each sample

          root.centroidGene.set(
              0, j, root.centroidGene.get(0, j) + dataMatrix.get(i, j)); // calcualtes sum
        }
      }
    }

    mostDiverseCell = root;
    mostVariableCell = root;

    for (int j = 0; j < numberOfSamples; j++) {
      root.centroidGene.set(
          0,
          j,
          root.centroidGene.get(0, j) / numberOfValidGenesInSample[j]); // get a mean root centroid
      root.left.centroidGene.set(0, j, root.centroidGene.get(0, j)); // assign to children
      root.right.centroidGene.set(0, j, root.centroidGene.get(0, j));
    }

    // put first value into diversity vector
    initDivSum = getNodeDiversitySum(root);
    cycleDiversity.add(new Float(initDivSum));
    root.cellDiversity = initDivSum / numberOfGenes;
    if (useClusterVariance) root.cellVariance = getNodeVariance(root);

    if (runToMaxCycles) growSOTUnrestricted(); // make tree w/o regard to diversity
    else growSOT(); // Construct tree

    // If performing HCL on samples using all genes
    if (calcFullTreeHCL) {
      calcFullTreeHCL();
    }

    // Code for HCL clustering
    if (calcClusterHCL) {
      calculateClusterHCL(); // calculate HCL trees for SOTA clusters
    }

    return inData; // inData has results incorporated
  }