public double kullbackLeiber(PositionWeightMatrix other) { double kl = 0; for (int i = 0; i < other.getNumCol(); i++) { kl += get(i).kullbackLeiber(other.get(i)); } return kl; }
public List<BED> match(SequenceRegion region, double[] background, float minScore) { PositionWeightMatrix neutralPWM = createIsoPWM(background, "neutral"); WindowSlider slider = WindowSlider.getSlider(region, size(), size() - 1); List<BED> scoredWindows = new ArrayList<BED>(); while (slider.hasNext()) { SequenceRegion window = slider.next(); // System.out.println(window + " seq: " + window.getSequenceBases()); char[] windowChrs = window.getSequenceBases().toCharArray(); double directScore = getLogLikelihood(windowChrs) - neutralPWM.getLogLikelihood(windowChrs); window.reverse(); char[] reversedChrs = window.getSequenceBases().toCharArray(); double reverseScore = getLogLikelihood(reversedChrs) - neutralPWM.getLogLikelihood(reversedChrs); double max = Math.max(directScore, reverseScore); if (max >= minScore) { BED scoredWindow = new BED(window); scoredWindow.setStart(scoredWindow.getStart() + region.getStart()); scoredWindow.setEnd(scoredWindow.getEnd() + region.getStart()); scoredWindow.setOrientation(directScore > reverseScore); scoredWindow.setScore(max); scoredWindow.setChromosome(region.getContainingSequenceId()); scoredWindows.add(scoredWindow); } } return scoredWindows; }
/** * Creates a PWM of similar dimension to this PWM but with all columns set to the given vector, * usually a neutral mutation vector. * * @param column * @return */ public PositionWeightMatrix createIsoPWM(double[] column, String name) { PositionWeightMatrix pwm = new PositionWeightMatrix(name); for (int i = 0; i < size(); i++) { pwm.addColumn(column); } return pwm; }
public PositionWeightMatrix copy() { PositionWeightMatrix copy = new PositionWeightMatrix(getName()); for (PositionWeightColumn c : this) { copy.add(c); } return copy; }
public PositionWeightMatrix reverseComplement() { PositionWeightMatrix wm = new PositionWeightMatrix(getName()); int numCol = size(); for (int i = 0; i < numCol; i++) wm.add(get(numCol - 1 - i).getComplement()); wm.name = name; wm.rightHighInfoStart = numCol - 1 - leftHighInfoStart; wm.leftHighInfoStart = numCol - 1 - rightHighInfoStart; return wm; }
protected void addToCentroid(Matrix m, PositionWeightMatrix pwm) { for (int i = 0; i < m.getColumnDimension(); i++) { PositionWeightColumn c = pwm.get(i); for (int j = 0; j < m.getRowDimension(); j++) { m.set(j, i, m.get(j, i) + c.getWeight(j)); } } }
/** * Computes the euclidean centroid, it may not be the formal centroid for different metrics but * intuitively the average counts should provide a good cluster representative which is what this * method intends to return * * @param pwmSet - Collection of pwms from which to compute the centroid0 * @return The euclidean centroid * @throws IllegalArgumentException - When not all PWMs have the same dimension. */ public PositionWeightMatrix centroidOf(Collection<PositionWeightMatrix> pwmSet) throws IllegalArgumentException { Matrix centroidMatrix = null; Iterator<PositionWeightMatrix> pwmIt = pwmSet.iterator(); if (pwmIt.hasNext()) { PositionWeightMatrix first = pwmIt.next(); PositionWeightColumn firstCol = first.get(0); centroidMatrix = new Matrix(firstCol.getAlphabetSize(), first.getNumCol()); addToCentroid(centroidMatrix, first); } while (pwmIt.hasNext()) { PositionWeightMatrix pwm = pwmIt.next(); if (pwm.getNumCol() != centroidMatrix.getColumnDimension()) { throw new IllegalArgumentException( "Error computing centroid. All PWMs in set should have the same dimension"); } addToCentroid(centroidMatrix, pwm); } centroidMatrix.times(1 / (double) pwmSet.size()); PositionWeightMatrix centroid = new PositionWeightMatrix("centroid"); for (int j = 0; j < centroidMatrix.getColumnDimension(); j++) { centroid.addColumn(centroidMatrix.getColumn(j)); } return centroid; }
/** * Compute Distribution of scores for motif. * * @param s * @param isNumSeq * @return */ public List<Double> computeScoreDistribution( float backgroundA, float backgroundC, float backgroundG, float backgroundT, int sampleSize) { double[] bg = {backgroundA, backgroundC, backgroundG, backgroundT}; PositionWeightMatrix bgPWM = createIsoPWM(bg, "bg"); // System.out.println("bg vector " + backgroundA + "," + backgroundC + "," + backgroundG + "," + // backgroundT); List<Double> dist = new ArrayList<Double>(sampleSize); int[] kmer = new int[size()]; for (int i = 0; i < sampleSize; i++) { Random r = new Random(); for (int j = 0; j < size(); j++) { kmer[j] = r.nextInt(ALPHABET_SIZE); } dist.add(getLogLikelihood(kmer) - bgPWM.getLogLikelihood(kmer)); // System.out.println(printKmer(kmer)+ " -- " + (getLogLikelihood(kmer) - // bgPWM.getLogLikelihood(kmer))); } return dist; }
/** * Trims this PWM by removing start end ending columns with information content lesser than given. * * @param ic - Columns below this ic at PWM edges will be trimmed. * @return The resulting PWM. */ public PositionWeightMatrix trimByInformationContent(double ic) { PositionWeightMatrix trimmed = new PositionWeightMatrix(getName()); int lastPos = size() - 1; while (lastPos >= 0 && get(lastPos).getInformationContent() < ic) { lastPos--; } int startPos = 0; while (startPos < lastPos && get(startPos).getInformationContent() < ic) { startPos++; } for (int i = startPos; i <= lastPos; i++) { trimmed.add(get(i)); } return trimmed; }
public PositionWeightMatrix permuteColumns(boolean preserveGCDinucleotides) { PositionWeightMatrix original = copy(); List<Integer> dinucleotidesToPreserveFirstIdx = preserveGCDinucleotides ? original.gcDinucleotidesFistIdxs() : new ArrayList<Integer>(); PositionWeightMatrix permutted = new PositionWeightMatrix(getName() + "_perm"); Random r = new Random(); List<Integer> idxList = new ArrayList<Integer>(original.size()); for (int i = 0; i < original.size(); i++) { idxList.add(i); } while (idxList.size() > 0) { int idxOfIdx = r.nextInt(idxList.size()); int idx = idxList.remove(idxOfIdx); if (dinucleotidesToPreserveFirstIdx.contains(idx - 1)) { PositionWeightColumn col = original.get(idx - 1); permutted.add(col); PositionWeightColumn nextCol = original.get(idx); idxList.remove(idxOfIdx - 1); permutted.add(nextCol); } else { PositionWeightColumn col = original.get(idx); permutted.add(col); if (dinucleotidesToPreserveFirstIdx.contains(idx)) { PositionWeightColumn nextCol = original.get(idx + 1); idxList.remove(idxOfIdx); permutted.add(nextCol); } } } return permutted; }
public void addPWMColumn(double[] col) { pwm.addColumn(col); }
public String getName() { return pwm.getName(); }