public List<BED> match(SequenceRegion region, double[] background, float minScore) { PositionWeightMatrix neutralPWM = createIsoPWM(background, "neutral"); WindowSlider slider = WindowSlider.getSlider(region, size(), size() - 1); List<BED> scoredWindows = new ArrayList<BED>(); while (slider.hasNext()) { SequenceRegion window = slider.next(); // System.out.println(window + " seq: " + window.getSequenceBases()); char[] windowChrs = window.getSequenceBases().toCharArray(); double directScore = getLogLikelihood(windowChrs) - neutralPWM.getLogLikelihood(windowChrs); window.reverse(); char[] reversedChrs = window.getSequenceBases().toCharArray(); double reverseScore = getLogLikelihood(reversedChrs) - neutralPWM.getLogLikelihood(reversedChrs); double max = Math.max(directScore, reverseScore); if (max >= minScore) { BED scoredWindow = new BED(window); scoredWindow.setStart(scoredWindow.getStart() + region.getStart()); scoredWindow.setEnd(scoredWindow.getEnd() + region.getStart()); scoredWindow.setOrientation(directScore > reverseScore); scoredWindow.setScore(max); scoredWindow.setChromosome(region.getContainingSequenceId()); scoredWindows.add(scoredWindow); } } return scoredWindows; }
/** * Compute Distribution of scores for motif. * * @param s * @param isNumSeq * @return */ public List<Double> computeScoreDistribution( float backgroundA, float backgroundC, float backgroundG, float backgroundT, int sampleSize) { double[] bg = {backgroundA, backgroundC, backgroundG, backgroundT}; PositionWeightMatrix bgPWM = createIsoPWM(bg, "bg"); // System.out.println("bg vector " + backgroundA + "," + backgroundC + "," + backgroundG + "," + // backgroundT); List<Double> dist = new ArrayList<Double>(sampleSize); int[] kmer = new int[size()]; for (int i = 0; i < sampleSize; i++) { Random r = new Random(); for (int j = 0; j < size(); j++) { kmer[j] = r.nextInt(ALPHABET_SIZE); } dist.add(getLogLikelihood(kmer) - bgPWM.getLogLikelihood(kmer)); // System.out.println(printKmer(kmer)+ " -- " + (getLogLikelihood(kmer) - // bgPWM.getLogLikelihood(kmer))); } return dist; }