예제 #1
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  /** Finds closest daughter cell */
  private SOTACell findMyDaughterCell(SOTACell parentNode, int geneIndex) {

    float dist1 =
        ExperimentUtil.geneDistance(
            dataMatrix, parentNode.left.centroidGene, geneIndex, 0, function, factor, absolute);
    float dist2 =
        ExperimentUtil.geneDistance(
            dataMatrix, parentNode.right.centroidGene, geneIndex, 0, function, factor, absolute);

    if (dist1 <= dist2) return parentNode.left;
    else return parentNode.right;
  }
예제 #2
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  public double getMaxLeafToRootPath() {

    SOTACell curr = head;
    SOTACell traveler = curr;

    double cumDist;
    double maxDist = -1;

    while (curr != null) {
      traveler = curr;
      cumDist = 0;

      while (traveler != null) {

        if (traveler.parent != null)
          cumDist +=
              ExperimentUtil.geneDistance(
                  traveler.parent.centroidGene,
                  traveler.centroidGene,
                  0,
                  0,
                  function,
                  factor,
                  absolute);

        traveler = traveler.parent;
      }

      if (cumDist < maxDist) maxDist = cumDist;

      curr = curr.succ;
    }

    return maxDist;
  }
예제 #3
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  private SOTACell findMyCell(int geneNum) {
    SOTACell curr = head;

    SOTACell myClosestCell = head;
    double keyDist = Float.POSITIVE_INFINITY;
    double currDist = 0;

    while (curr != null) {

      currDist =
          ExperimentUtil.geneDistance(
              dataMatrix, curr.centroidGene, geneNum, 0, function, factor, absolute);

      if (currDist <= keyDist) {
        keyDist = currDist;
        myClosestCell = curr;
      }
      curr = curr.succ;
    }

    if (myNucleus[geneNum] != myClosestCell) {
      myNucleus[geneNum] = myClosestCell;
      myClosestCell.addMember(geneNum);
    }

    return myClosestCell;
  }
예제 #4
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  private float getNodeVariance(SOTACell curr) {
    float div = 0;
    int n = curr.members.size();
    int geneIndex1, geneIndex2;
    float currDist;
    float maxDist = 0;
    int numGoodRatios = 0;

    for (int i = 0; i < n; i++) {
      for (int j = i + 1; j < n; j++) {

        geneIndex1 = ((Integer) (curr.members.elementAt(i))).intValue();
        geneIndex2 = ((Integer) (curr.members.elementAt(j))).intValue();

        currDist =
            ExperimentUtil.geneDistance(
                dataMatrix, null, geneIndex1, geneIndex2, function, factor, absolute);

        if (!Float.isNaN(currDist) && currDist > maxDist) {
          maxDist = currDist;
        }
      }
    }
    return maxDist;
  }
예제 #5
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  /** Deterimins variablity cutoff given a p value */
  private float resampleAndGetNewCutoff(FloatMatrix origMatrix, float p) {
    FloatMatrix randMatrix = randomizeMatrix(origMatrix);

    int rows = origMatrix.getRowDimension();
    int cols = origMatrix.getColumnDimension();
    int NUM_BINS = 500;

    int numSamplePoints = 0;
    int cumCutoff;

    float[][] distances = new float[rows][rows];

    for (int i = 0; i < rows - 1; i++) {
      for (int j = 0; j < i; j++) {
        distances[i][j] =
            ExperimentUtil.geneDistance(randMatrix, null, i, j, function, factor, absolute);
      }
    }

    // now have all gene to gene distances
    float[] minAndMax = getMinAndMax(distances, rows);
    float min = minAndMax[0];
    float max = minAndMax[1];

    float[] cumDist = new float[NUM_BINS];

    for (int i = 0; i < NUM_BINS; i++) {
      cumDist[i] = 0;
    }

    for (int i = 0; i < rows - 1; i++) {
      for (int j = 0; j < i; j++) {
        cumDist[(int) ((float) (NUM_BINS - 1) * (distances[i][j] - min) / (max - min))]++;
        numSamplePoints++;
      }
    }

    for (int i = 0; i < NUM_BINS; i++) {
      cumDist[i] /= (float) numSamplePoints;
    }

    // now find the bin that has
    float cumCount = 0;
    int bin = 0;
    // cumCutoff = (int)(numSamplePoints * p);

    while (cumCount < p) {
      cumCount += cumDist[bin];
      bin++;
    }

    return (((float) bin - (float) 0.5) / (float) NUM_BINS) * (max - min) + min;
  }
예제 #6
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  // gets sum of min distance of genes in parentNode to child centroids
  private double getInputError(SOTACell parentNode) {
    int n = parentNode.members.size();
    double cumErr = 0;
    double d1;
    double d2;
    int geneIndex;

    for (int i = 0; i < n; i++) {
      geneIndex = ((Integer) (parentNode.members.elementAt(i))).intValue();

      d1 =
          ExperimentUtil.geneDistance(
              dataMatrix, parentNode.left.centroidGene, geneIndex, 0, function, factor, absolute);
      d2 =
          ExperimentUtil.geneDistance(
              dataMatrix, parentNode.right.centroidGene, geneIndex, 0, function, factor, absolute);

      if (d1 <= d2) cumErr += d1;
      else cumErr += d2;
    }
    return cumErr / n;
  }
예제 #7
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  private void loadReturnValues(
      SOTACell subRoot, int index, float[] h, int[] pop, int[] left, int[] right) {
    pop[index] = subRoot.members.size();
    if (subRoot == root) h[index] = 0;
    else
      h[index] =
          ExperimentUtil.geneDistance(
              subRoot.centroidGene, subRoot.parent.centroidGene, 0, 0, function, factor, absolute);

    if (subRoot.left != null) {
      left[index] = utilCounter + 1;
      utilCounter++;
      loadReturnValues(subRoot.left, utilCounter, h, pop, left, right);
      right[index] = utilCounter + 1;
      utilCounter++;
      loadReturnValues(subRoot.right, utilCounter, h, pop, left, right);
    }
  }
예제 #8
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  private float getNodeDiversitySum(SOTACell curr) {

    float div = 0;
    int n = curr.members.size();
    int geneIndex;
    float currDist;
    float sum = 0;

    for (int i = 0; i < n; i++) {
      geneIndex = ((Integer) (curr.members.elementAt(i))).intValue();
      currDist =
          ExperimentUtil.geneDistance(
              dataMatrix, curr.centroidGene, geneIndex, 0, function, factor, absolute);
      if (!Float.isNaN(currDist)) {
        sum += currDist;
      }
    }
    return sum;
  }
예제 #9
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  private SOTACell findMyCellInSubTree(SOTACell trainingCell, int geneNum, int level) {

    SOTACell currCell = trainingCell;
    SOTACell myCell = trainingCell;
    int levelIndex = 0;

    while (currCell.parent != null && levelIndex < level) {
      currCell = currCell.parent;
      levelIndex++;
    }
    // now currNode is at root, or 'level' number of nodes above the training node

    Vector cellList = new Vector();

    getCellsBelow(cellList, currCell);

    float minDist = Float.POSITIVE_INFINITY;
    float currDist;

    for (int i = 0; i < cellList.size(); i++) {
      currCell = (SOTACell) (cellList.elementAt(i));
      currDist =
          ExperimentUtil.geneDistance(
              dataMatrix, currCell.centroidGene, geneNum, 0, function, factor, absolute);
      if (currDist < minDist) {
        minDist = currDist;
        myCell = currCell;
      }
    }

    if (myNucleus[geneNum] != myCell) {

      myNucleus[geneNum] = myCell;
      myCell.addMember(geneNum);
    }
    return myCell;
  }
예제 #10
0
파일: SOTA.java 프로젝트: SamGG/mev-tm4
  // sets cell diversities, and variances (if required)
  private void setDiversities() {
    SOTACell curr = head;
    double cellSum = 0;
    double cellVar = 0;
    double treeSum = 0;
    double maxCellDiv = -1;
    double maxCellVar = -1;
    int numberOfCells = 0;
    double currDist = 0;

    mostDiverseCell = head;
    mostVariableCell = head;

    while (curr != null) {

      numberOfCells++;
      cellSum = 0;

      // for all members of the node get distance to set cell resource (diversity)
      for (int i = 0; i < curr.members.size(); i++) {
        cellSum +=
            ExperimentUtil.geneDistance(
                dataMatrix,
                curr.centroidGene,
                ((Integer) (curr.members.elementAt(i))).intValue(),
                0,
                function,
                factor,
                absolute);
      }

      curr.cellDiversity = (cellSum / curr.members.size());

      if (curr.cellDiversity > maxCellDiv && curr.members.size() > 1) {
        maxCellDiv = curr.cellDiversity;
        mostDiverseCell = curr;
      }

      treeSum += cellSum;

      if (useClusterVariance) { // using cell variance, need to find mostVariable cell
        cellVar = 0;
        currDist = 0;

        // get cell varience
        // if new members have been added
        if (curr.changedMembership) {
          // use max gene to gene distance
          for (int i = 0; i < curr.members.size(); i++) {
            for (int j = 0; j < curr.members.size(); j++) {

              currDist =
                  ExperimentUtil.geneDistance(
                      dataMatrix,
                      null,
                      ((Integer) (curr.members.elementAt(i))).intValue(),
                      ((Integer) (curr.members.elementAt(j))).intValue(),
                      function,
                      factor,
                      absolute);

              // get max dist. to be cellVar
              if (currDist > cellVar) {
                cellVar = currDist;
              }
            }
          }
          curr.cellVariance = cellVar;
        } else // no change to membership so we dont hve to recalculate variance
        cellVar = curr.cellVariance;

        if (cellVar > maxCellVar && curr.members.size() > 1) {
          maxCellVar = cellVar;
          mostVariableCell = curr;
        }
      }
      curr.changedMembership = false; // variance already set for current population
      curr = curr.succ;
    }
    treeDiversity = treeSum;
  }