/** Finds closest daughter cell */ private SOTACell findMyDaughterCell(SOTACell parentNode, int geneIndex) { float dist1 = ExperimentUtil.geneDistance( dataMatrix, parentNode.left.centroidGene, geneIndex, 0, function, factor, absolute); float dist2 = ExperimentUtil.geneDistance( dataMatrix, parentNode.right.centroidGene, geneIndex, 0, function, factor, absolute); if (dist1 <= dist2) return parentNode.left; else return parentNode.right; }
public double getMaxLeafToRootPath() { SOTACell curr = head; SOTACell traveler = curr; double cumDist; double maxDist = -1; while (curr != null) { traveler = curr; cumDist = 0; while (traveler != null) { if (traveler.parent != null) cumDist += ExperimentUtil.geneDistance( traveler.parent.centroidGene, traveler.centroidGene, 0, 0, function, factor, absolute); traveler = traveler.parent; } if (cumDist < maxDist) maxDist = cumDist; curr = curr.succ; } return maxDist; }
private SOTACell findMyCell(int geneNum) { SOTACell curr = head; SOTACell myClosestCell = head; double keyDist = Float.POSITIVE_INFINITY; double currDist = 0; while (curr != null) { currDist = ExperimentUtil.geneDistance( dataMatrix, curr.centroidGene, geneNum, 0, function, factor, absolute); if (currDist <= keyDist) { keyDist = currDist; myClosestCell = curr; } curr = curr.succ; } if (myNucleus[geneNum] != myClosestCell) { myNucleus[geneNum] = myClosestCell; myClosestCell.addMember(geneNum); } return myClosestCell; }
private float getNodeVariance(SOTACell curr) { float div = 0; int n = curr.members.size(); int geneIndex1, geneIndex2; float currDist; float maxDist = 0; int numGoodRatios = 0; for (int i = 0; i < n; i++) { for (int j = i + 1; j < n; j++) { geneIndex1 = ((Integer) (curr.members.elementAt(i))).intValue(); geneIndex2 = ((Integer) (curr.members.elementAt(j))).intValue(); currDist = ExperimentUtil.geneDistance( dataMatrix, null, geneIndex1, geneIndex2, function, factor, absolute); if (!Float.isNaN(currDist) && currDist > maxDist) { maxDist = currDist; } } } return maxDist; }
/** Deterimins variablity cutoff given a p value */ private float resampleAndGetNewCutoff(FloatMatrix origMatrix, float p) { FloatMatrix randMatrix = randomizeMatrix(origMatrix); int rows = origMatrix.getRowDimension(); int cols = origMatrix.getColumnDimension(); int NUM_BINS = 500; int numSamplePoints = 0; int cumCutoff; float[][] distances = new float[rows][rows]; for (int i = 0; i < rows - 1; i++) { for (int j = 0; j < i; j++) { distances[i][j] = ExperimentUtil.geneDistance(randMatrix, null, i, j, function, factor, absolute); } } // now have all gene to gene distances float[] minAndMax = getMinAndMax(distances, rows); float min = minAndMax[0]; float max = minAndMax[1]; float[] cumDist = new float[NUM_BINS]; for (int i = 0; i < NUM_BINS; i++) { cumDist[i] = 0; } for (int i = 0; i < rows - 1; i++) { for (int j = 0; j < i; j++) { cumDist[(int) ((float) (NUM_BINS - 1) * (distances[i][j] - min) / (max - min))]++; numSamplePoints++; } } for (int i = 0; i < NUM_BINS; i++) { cumDist[i] /= (float) numSamplePoints; } // now find the bin that has float cumCount = 0; int bin = 0; // cumCutoff = (int)(numSamplePoints * p); while (cumCount < p) { cumCount += cumDist[bin]; bin++; } return (((float) bin - (float) 0.5) / (float) NUM_BINS) * (max - min) + min; }
// gets sum of min distance of genes in parentNode to child centroids private double getInputError(SOTACell parentNode) { int n = parentNode.members.size(); double cumErr = 0; double d1; double d2; int geneIndex; for (int i = 0; i < n; i++) { geneIndex = ((Integer) (parentNode.members.elementAt(i))).intValue(); d1 = ExperimentUtil.geneDistance( dataMatrix, parentNode.left.centroidGene, geneIndex, 0, function, factor, absolute); d2 = ExperimentUtil.geneDistance( dataMatrix, parentNode.right.centroidGene, geneIndex, 0, function, factor, absolute); if (d1 <= d2) cumErr += d1; else cumErr += d2; } return cumErr / n; }
private void loadReturnValues( SOTACell subRoot, int index, float[] h, int[] pop, int[] left, int[] right) { pop[index] = subRoot.members.size(); if (subRoot == root) h[index] = 0; else h[index] = ExperimentUtil.geneDistance( subRoot.centroidGene, subRoot.parent.centroidGene, 0, 0, function, factor, absolute); if (subRoot.left != null) { left[index] = utilCounter + 1; utilCounter++; loadReturnValues(subRoot.left, utilCounter, h, pop, left, right); right[index] = utilCounter + 1; utilCounter++; loadReturnValues(subRoot.right, utilCounter, h, pop, left, right); } }
private float getNodeDiversitySum(SOTACell curr) { float div = 0; int n = curr.members.size(); int geneIndex; float currDist; float sum = 0; for (int i = 0; i < n; i++) { geneIndex = ((Integer) (curr.members.elementAt(i))).intValue(); currDist = ExperimentUtil.geneDistance( dataMatrix, curr.centroidGene, geneIndex, 0, function, factor, absolute); if (!Float.isNaN(currDist)) { sum += currDist; } } return sum; }
private SOTACell findMyCellInSubTree(SOTACell trainingCell, int geneNum, int level) { SOTACell currCell = trainingCell; SOTACell myCell = trainingCell; int levelIndex = 0; while (currCell.parent != null && levelIndex < level) { currCell = currCell.parent; levelIndex++; } // now currNode is at root, or 'level' number of nodes above the training node Vector cellList = new Vector(); getCellsBelow(cellList, currCell); float minDist = Float.POSITIVE_INFINITY; float currDist; for (int i = 0; i < cellList.size(); i++) { currCell = (SOTACell) (cellList.elementAt(i)); currDist = ExperimentUtil.geneDistance( dataMatrix, currCell.centroidGene, geneNum, 0, function, factor, absolute); if (currDist < minDist) { minDist = currDist; myCell = currCell; } } if (myNucleus[geneNum] != myCell) { myNucleus[geneNum] = myCell; myCell.addMember(geneNum); } return myCell; }
// sets cell diversities, and variances (if required) private void setDiversities() { SOTACell curr = head; double cellSum = 0; double cellVar = 0; double treeSum = 0; double maxCellDiv = -1; double maxCellVar = -1; int numberOfCells = 0; double currDist = 0; mostDiverseCell = head; mostVariableCell = head; while (curr != null) { numberOfCells++; cellSum = 0; // for all members of the node get distance to set cell resource (diversity) for (int i = 0; i < curr.members.size(); i++) { cellSum += ExperimentUtil.geneDistance( dataMatrix, curr.centroidGene, ((Integer) (curr.members.elementAt(i))).intValue(), 0, function, factor, absolute); } curr.cellDiversity = (cellSum / curr.members.size()); if (curr.cellDiversity > maxCellDiv && curr.members.size() > 1) { maxCellDiv = curr.cellDiversity; mostDiverseCell = curr; } treeSum += cellSum; if (useClusterVariance) { // using cell variance, need to find mostVariable cell cellVar = 0; currDist = 0; // get cell varience // if new members have been added if (curr.changedMembership) { // use max gene to gene distance for (int i = 0; i < curr.members.size(); i++) { for (int j = 0; j < curr.members.size(); j++) { currDist = ExperimentUtil.geneDistance( dataMatrix, null, ((Integer) (curr.members.elementAt(i))).intValue(), ((Integer) (curr.members.elementAt(j))).intValue(), function, factor, absolute); // get max dist. to be cellVar if (currDist > cellVar) { cellVar = currDist; } } } curr.cellVariance = cellVar; } else // no change to membership so we dont hve to recalculate variance cellVar = curr.cellVariance; if (cellVar > maxCellVar && curr.members.size() > 1) { maxCellVar = cellVar; mostVariableCell = curr; } } curr.changedMembership = false; // variance already set for current population curr = curr.succ; } treeDiversity = treeSum; }