private void divideCell(SOTACell cellToDivide) { float[] parentCentroid; cellToDivide.left = new SOTACell(numberOfSamples, dataMatrix); cellToDivide.right = new SOTACell(numberOfSamples, dataMatrix); numberOfClusters++; cellToDivide.left.parent = cellToDivide; cellToDivide.right.parent = cellToDivide; cellToDivide.right.pred = cellToDivide.left; cellToDivide.left.succ = cellToDivide.right; if (cellToDivide.pred != null) { cellToDivide.left.pred = cellToDivide.pred; cellToDivide.left.pred.succ = cellToDivide.left; } else cellToDivide.left.pred = null; if (cellToDivide.succ != null) { cellToDivide.right.succ = cellToDivide.succ; cellToDivide.right.succ.pred = cellToDivide.right; } else cellToDivide.right.succ = null; if (cellToDivide == head) head = cellToDivide.left; cellToDivide.succ = null; cellToDivide.pred = null; for (int i = 0; i < numberOfSamples; i++) { cellToDivide.left.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i)); cellToDivide.right.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i)); } }
/** * Performs SOTA tree construction given parameters provided in <code>AlgorithmData</code>. * Results are returned in AlgorthmData */ public AlgorithmData execute(AlgorithmData data) throws AlgorithmException { // Get parameters AlgorithmParameters params = data.getParams(); sotaGenes = params.getBoolean("sota-cluster-genes", true); maxNumEpochs = params.getInt("max-epochs-per-cycle", 1000); maxNumCycles = params.getInt("max-number-of-cycles", 10); epochCriteria = params.getFloat("epoch-improvement-cutoff"); endCriteria = params.getFloat("end-training-diversity"); runToMaxCycles = params.getBoolean("run-to-max-cycles"); useClusterVariance = params.getBoolean("use-cluster-variance", false); function = params.getInt("distance-function", EUCLIDEAN); absolute = params.getBoolean("distance-absolute", true); calcClusterHCL = params.getBoolean("calcClusterHCL", false); calculate_genes = params.getBoolean("calculate-genes", false); calculate_experiments = params.getBoolean("calculate-experiments", false); calcFullTreeHCL = params.getBoolean("calcFullTreeHCL", false); method = params.getInt("method-linkage", 0); pValue = params.getFloat("pValue", (float) 0.05); migW = params.getFloat("mig_w", (float) 0.01); migP = params.getFloat("mig_p", (float) 0.005); migS = params.getFloat("mig_s", (float) 0.001); neighborhoodLevel = params.getInt("neighborhood-level", 5); hcl_function = params.getInt("hcl-distance-function", EUCLIDEAN); hcl_absolute = params.getBoolean("hcl-distance-absolute", false); inData = data; // keep a handle on AlgorithmData for return // Set factor based on function if ((function == PEARSON) || (function == PEARSONUNCENTERED) || (function == PEARSONSQARED) || (function == COSINE) || (function == COVARIANCE) || (function == DOTPRODUCT) || (function == SPEARMANRANK) || (function == KENDALLSTAU)) { myFactor = -1.0f; } else { myFactor = 1.0f; } factor = (float) 1.0; // scaling factor sent to getDistance methods inData.addParam("factor", String.valueOf(myFactor)); // return factor endCriteria *= myFactor; // alter polarity fo endCriteria based on metric treeDiversity = Float.POSITIVE_INFINITY; dataMatrix = data.getMatrix("experiment"); // point dataMatrix at supplied matrix numberOfGenes = dataMatrix.getRowDimension(); numberOfSamples = dataMatrix.getColumnDimension(); myNucleus = new SOTACell[numberOfGenes]; // will be shortcut from gene index to a cell cycleDiversity = new Vector(); // reset max number of cycles if limited by number of genes if (maxNumCycles >= numberOfGenes) maxNumCycles = numberOfGenes - 1; // if using variablility, resample data, select cutoff based on p value supplied if (useClusterVariance) { endCriteria = resampleAndGetNewCutoff(dataMatrix, pValue); } // initialize first cell and two children root = new SOTACell(numberOfSamples, dataMatrix); root.right = new SOTACell(numberOfSamples, dataMatrix); root.left = new SOTACell(numberOfSamples, dataMatrix); numberOfClusters = 2; root.left.parent = root; root.right.parent = root; head = root.left; root.left.succ = root.right; root.right.pred = root.left; int[] numberOfValidGenesInSample = new int[numberOfSamples]; // set to zero for (int i = 0; i < numberOfSamples; i++) numberOfValidGenesInSample[i] = 0; // Inialize centroid root centroid to zeros for (int i = 0; i < numberOfSamples; i++) { root.centroidGene.set(0, i, 0); } for (int i = 0; i < numberOfGenes; i++) { root.members.add(new Integer(i)); // add all gene indices to root myNucleus[i] = root; // set all gene nuclei to point to root for (int j = 0; j < numberOfSamples; j++) { if (!(Float.isNaN(dataMatrix.get(i, j)))) { numberOfValidGenesInSample[j]++; // count number of genes with valid data in each sample root.centroidGene.set( 0, j, root.centroidGene.get(0, j) + dataMatrix.get(i, j)); // calcualtes sum } } } mostDiverseCell = root; mostVariableCell = root; for (int j = 0; j < numberOfSamples; j++) { root.centroidGene.set( 0, j, root.centroidGene.get(0, j) / numberOfValidGenesInSample[j]); // get a mean root centroid root.left.centroidGene.set(0, j, root.centroidGene.get(0, j)); // assign to children root.right.centroidGene.set(0, j, root.centroidGene.get(0, j)); } // put first value into diversity vector initDivSum = getNodeDiversitySum(root); cycleDiversity.add(new Float(initDivSum)); root.cellDiversity = initDivSum / numberOfGenes; if (useClusterVariance) root.cellVariance = getNodeVariance(root); if (runToMaxCycles) growSOTUnrestricted(); // make tree w/o regard to diversity else growSOT(); // Construct tree // If performing HCL on samples using all genes if (calcFullTreeHCL) { calcFullTreeHCL(); } // Code for HCL clustering if (calcClusterHCL) { calculateClusterHCL(); // calculate HCL trees for SOTA clusters } return inData; // inData has results incorporated }