public double kullbackLeiber(PositionWeightMatrix other) {
   double kl = 0;
   for (int i = 0; i < other.getNumCol(); i++) {
     kl += get(i).kullbackLeiber(other.get(i));
   }
   return kl;
 }
  public List<BED> match(SequenceRegion region, double[] background, float minScore) {
    PositionWeightMatrix neutralPWM = createIsoPWM(background, "neutral");
    WindowSlider slider = WindowSlider.getSlider(region, size(), size() - 1);
    List<BED> scoredWindows = new ArrayList<BED>();

    while (slider.hasNext()) {
      SequenceRegion window = slider.next();
      // System.out.println(window + " seq: " + window.getSequenceBases());
      char[] windowChrs = window.getSequenceBases().toCharArray();
      double directScore = getLogLikelihood(windowChrs) - neutralPWM.getLogLikelihood(windowChrs);
      window.reverse();
      char[] reversedChrs = window.getSequenceBases().toCharArray();
      double reverseScore =
          getLogLikelihood(reversedChrs) - neutralPWM.getLogLikelihood(reversedChrs);
      double max = Math.max(directScore, reverseScore);
      if (max >= minScore) {
        BED scoredWindow = new BED(window);
        scoredWindow.setStart(scoredWindow.getStart() + region.getStart());
        scoredWindow.setEnd(scoredWindow.getEnd() + region.getStart());
        scoredWindow.setOrientation(directScore > reverseScore);
        scoredWindow.setScore(max);
        scoredWindow.setChromosome(region.getContainingSequenceId());
        scoredWindows.add(scoredWindow);
      }
    }
    return scoredWindows;
  }
  /**
   * Creates a PWM of similar dimension to this PWM but with all columns set to the given vector,
   * usually a neutral mutation vector.
   *
   * @param column
   * @return
   */
  public PositionWeightMatrix createIsoPWM(double[] column, String name) {
    PositionWeightMatrix pwm = new PositionWeightMatrix(name);
    for (int i = 0; i < size(); i++) {
      pwm.addColumn(column);
    }

    return pwm;
  }
  public PositionWeightMatrix copy() {
    PositionWeightMatrix copy = new PositionWeightMatrix(getName());
    for (PositionWeightColumn c : this) {
      copy.add(c);
    }

    return copy;
  }
  public PositionWeightMatrix reverseComplement() {
    PositionWeightMatrix wm = new PositionWeightMatrix(getName());
    int numCol = size();
    for (int i = 0; i < numCol; i++) wm.add(get(numCol - 1 - i).getComplement());

    wm.name = name;
    wm.rightHighInfoStart = numCol - 1 - leftHighInfoStart;
    wm.leftHighInfoStart = numCol - 1 - rightHighInfoStart;

    return wm;
  }
 protected void addToCentroid(Matrix m, PositionWeightMatrix pwm) {
   for (int i = 0; i < m.getColumnDimension(); i++) {
     PositionWeightColumn c = pwm.get(i);
     for (int j = 0; j < m.getRowDimension(); j++) {
       m.set(j, i, m.get(j, i) + c.getWeight(j));
     }
   }
 }
  /**
   * Computes the euclidean centroid, it may not be the formal centroid for different metrics but
   * intuitively the average counts should provide a good cluster representative which is what this
   * method intends to return
   *
   * @param pwmSet - Collection of pwms from which to compute the centroid0
   * @return The euclidean centroid
   * @throws IllegalArgumentException - When not all PWMs have the same dimension.
   */
  public PositionWeightMatrix centroidOf(Collection<PositionWeightMatrix> pwmSet)
      throws IllegalArgumentException {
    Matrix centroidMatrix = null;
    Iterator<PositionWeightMatrix> pwmIt = pwmSet.iterator();
    if (pwmIt.hasNext()) {
      PositionWeightMatrix first = pwmIt.next();
      PositionWeightColumn firstCol = first.get(0);
      centroidMatrix = new Matrix(firstCol.getAlphabetSize(), first.getNumCol());
      addToCentroid(centroidMatrix, first);
    }
    while (pwmIt.hasNext()) {
      PositionWeightMatrix pwm = pwmIt.next();
      if (pwm.getNumCol() != centroidMatrix.getColumnDimension()) {
        throw new IllegalArgumentException(
            "Error computing centroid. All PWMs in set should have the same dimension");
      }
      addToCentroid(centroidMatrix, pwm);
    }

    centroidMatrix.times(1 / (double) pwmSet.size());

    PositionWeightMatrix centroid = new PositionWeightMatrix("centroid");
    for (int j = 0; j < centroidMatrix.getColumnDimension(); j++) {
      centroid.addColumn(centroidMatrix.getColumn(j));
    }
    return centroid;
  }
 /**
  * Compute Distribution of scores for motif.
  *
  * @param s
  * @param isNumSeq
  * @return
  */
 public List<Double> computeScoreDistribution(
     float backgroundA, float backgroundC, float backgroundG, float backgroundT, int sampleSize) {
   double[] bg = {backgroundA, backgroundC, backgroundG, backgroundT};
   PositionWeightMatrix bgPWM = createIsoPWM(bg, "bg");
   // System.out.println("bg vector " + backgroundA + "," + backgroundC + "," + backgroundG + "," +
   // backgroundT);
   List<Double> dist = new ArrayList<Double>(sampleSize);
   int[] kmer = new int[size()];
   for (int i = 0; i < sampleSize; i++) {
     Random r = new Random();
     for (int j = 0; j < size(); j++) {
       kmer[j] = r.nextInt(ALPHABET_SIZE);
     }
     dist.add(getLogLikelihood(kmer) - bgPWM.getLogLikelihood(kmer));
     // System.out.println(printKmer(kmer)+ " -- " + (getLogLikelihood(kmer) -
     // bgPWM.getLogLikelihood(kmer)));
   }
   return dist;
 }
  /**
   * Trims this PWM by removing start end ending columns with information content lesser than given.
   *
   * @param ic - Columns below this ic at PWM edges will be trimmed.
   * @return The resulting PWM.
   */
  public PositionWeightMatrix trimByInformationContent(double ic) {
    PositionWeightMatrix trimmed = new PositionWeightMatrix(getName());

    int lastPos = size() - 1;
    while (lastPos >= 0 && get(lastPos).getInformationContent() < ic) {
      lastPos--;
    }

    int startPos = 0;
    while (startPos < lastPos && get(startPos).getInformationContent() < ic) {
      startPos++;
    }

    for (int i = startPos; i <= lastPos; i++) {
      trimmed.add(get(i));
    }

    return trimmed;
  }
  public PositionWeightMatrix permuteColumns(boolean preserveGCDinucleotides) {
    PositionWeightMatrix original = copy();
    List<Integer> dinucleotidesToPreserveFirstIdx =
        preserveGCDinucleotides ? original.gcDinucleotidesFistIdxs() : new ArrayList<Integer>();
    PositionWeightMatrix permutted = new PositionWeightMatrix(getName() + "_perm");
    Random r = new Random();
    List<Integer> idxList = new ArrayList<Integer>(original.size());
    for (int i = 0; i < original.size(); i++) {
      idxList.add(i);
    }
    while (idxList.size() > 0) {
      int idxOfIdx = r.nextInt(idxList.size());
      int idx = idxList.remove(idxOfIdx);
      if (dinucleotidesToPreserveFirstIdx.contains(idx - 1)) {
        PositionWeightColumn col = original.get(idx - 1);
        permutted.add(col);
        PositionWeightColumn nextCol = original.get(idx);
        idxList.remove(idxOfIdx - 1);
        permutted.add(nextCol);
      } else {
        PositionWeightColumn col = original.get(idx);
        permutted.add(col);
        if (dinucleotidesToPreserveFirstIdx.contains(idx)) {
          PositionWeightColumn nextCol = original.get(idx + 1);
          idxList.remove(idxOfIdx);
          permutted.add(nextCol);
        }
      }
    }

    return permutted;
  }
示例#11
0
文件: MemeMotif.java 项目: ADerr/ESAT
 public void addPWMColumn(double[] col) {
   pwm.addColumn(col);
 }
示例#12
0
文件: MemeMotif.java 项目: ADerr/ESAT
 public String getName() {
   return pwm.getName();
 }