protected MnaseOutput countMnaseReads( String chr, int pos, ChromScoresFast fwCounts, ChromScoresFast revCounts, boolean chipSeqMetric) throws Exception { double PSEUDOCOUNT = 0.005; // per bp int FRAGLENHIGH = 600; int center = pos; int quarterNuc = (int) ((double) this.footprintSize / 4.0); int halfNuc = (int) ((double) this.footprintSize / 2.0); int assocHalf = (int) ((double) this.assocSize / 2.0); int fraghalf = (int) ((double) FRAGLENHIGH / 2.0); int cycle = halfNuc; int s, e; MnaseOutput out = new MnaseOutput(); // int windHalf = (int)((double)this.normalizationWindow/2.0); // s = center - windHalf; // e = center + windHalf; // out.normWindCount = (double)this.countUnstrandedReads(fwCounts, revCounts,chr, s, e); // // System.err.printf("NormWind Cpg %d\t Getting (all) reads %s at %d-%d\t%d\n", center, // mnasePrefix, s, e,(int)normWindCount); if (chipSeqMetric) { // CHIP-SEQ . THIS SHOULD TAKE A FRAGMENT LEN, BUT 600 is based on K4 IMR90 offsets. System.err.println("Why are we using ChIP-seq metric??"); System.exit(1); // First fw s = center - fraghalf; e = center; int preCountPos = countReads(fwCounts, chr, s, e); // if (preCountPos>0) System.err.printf("Cpg %d\t Getting (+) reads at %d-%d\t%d\n", center, // s, e,preCountPos); // Then rev s = center; e = center + fraghalf; int postCountNeg = countReads(revCounts, chr, s, e); // if (postCountNeg>0) System.err.printf("Cpg %d\t Getting (-) reads at %d-%d\t%d\n", center, // s, e,postCountNeg); out.val = (double) (preCountPos + postCountNeg) / (double) fraghalf; out.rawCounts = preCountPos + postCountNeg; // if (this.normalizationWindow>0.0) // { // out = ((double)(preCountPos+postCountNeg)+(PSEUDOCOUNT*(double)fraghalf)) * // (1.0/((double)normWindCount+(PSEUDOCOUNT*(double)this.normalizationWindow))) * // (((double)this.normalizationWindow)/((double)fraghalf)); // out = Math.log(out)/LOG_OF_TWO; // } } else { // MNASE METRIC // First fw s = center - cycle - assocHalf; e = center - cycle + assocHalf; int preCountPos = countReads(fwCounts, chr, s, e); // System.err.printf("Cpg %d\t Getting (+) reads at %d-%d\t%d\n", center, s, e,preCountPos); // Then rev s = center + cycle - assocHalf; e = center + cycle + assocHalf; int postCountNeg = countReads(revCounts, chr, s, e); // System.err.printf("Cpg %d\t Getting (-) reads at %d-%d\t%d\n", center, s, e,postCountNeg); int phasedCount = preCountPos + postCountNeg; int phasedLen = 2 * assocHalf; out.rawCounts = phasedCount; out.val = phasedCount; // Then unphased int unphasedCount = 0; s = center - assocHalf; e = center + assocHalf; unphasedCount += countUnstrandedReads(fwCounts, revCounts, chr, s, e); int unphasedLen = 2 * assocHalf; // s = center - this.PERIODICITY - assocHalf; // e = center - this.PERIODICITY + assocHalf; // unphasedCount += countUnstrandedReads(fwCounts, revCounts, chr, s, e); // s = center + this.PERIODICITY - assocHalf; // e = center + this.PERIODICITY + assocHalf; // unphasedCount += countUnstrandedReads(fwCounts, revCounts, chr, s, e); // int unphasedLen = 6 * assocHalf; // // System.err.printf("Cpg %d\t Getting unphased reads at %d-%d\t%d\n", center, center - // this.PERIODICITY - assocHalf, // // center + this.PERIODICITY + assocHalf,unphasedCount); // // // out.val = ((double) (phasedCount) + (PSEUDOCOUNT * (double) phasedLen)) * (1.0 / ((double) unphasedCount + (PSEUDOCOUNT * (double) unphasedLen))) * (((double) unphasedLen) / ((double) phasedLen)); out.val = Math.log(out.val) / LOG_OF_TWO; out.val = phasedCount - unphasedCount; out.val = Math.min(preCountPos, postCountNeg) - unphasedCount; } return out; }
public void doMain(String[] args) throws Exception { CmdLineParser parser = new CmdLineParser(this); // if you have a wider console, you could increase the value; // here 80 is also the default parser.setUsageWidth(80); try { parser.parseArgument(args); if ((arguments.size() != 0)) { System.err.println(C_USAGE); parser.printUsage(System.err); System.exit(1); } if ((outPrefix == null)) { System.err.println("Must specify outPrefix and at least one mnasePrefix"); parser.printUsage(System.err); System.err.println(C_USAGE); System.exit(1); } } catch (CmdLineException e) { System.err.println(e.getMessage()); System.err.println(C_USAGE); // print the list of available options parser.printUsage(System.err); System.err.println(); return; } Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).setLevel(Level.SEVERE); // Setup output files and print domain finders ListUtils.setDelim("-"); String mnasePrefixStr = mnasePrefix; String normString = (this.normalizationWindow == 0) ? "" : String.format(".normalized%dbpWind", this.normalizationWindow); String motifString = (this.motif == null) ? "" : String.format(".motif%s", this.motif.toUpperCase()); String withinStr = (this.withinFeat == null) ? "" : String.format(".withinFeat-%s", this.withinFeat); if (this.filterByMotif != null) motifString += String.format( ".filteredByMotif%s%s%s", this.filterByMotif, (this.filterByMotifInvert) ? "-inverted" : "", (this.filterByMotifOffset != 0) ? String.format("-offset%d", this.filterByMotifOffset) : ""); String fwOnlyStr = (this.fwStrandOnly) ? ".fwStrandOnly" : ""; String name = String.format( "nucleosomeReads.%s%s%s.minReads%.2f%s.nuc%d.assoc%d%s.%s.%s", this.outPrefix, motifString, withinStr, this.minReadsPerBp, fwOnlyStr, this.footprintSize, this.assocSize, normString, mnasePrefixStr, methPrefix); String outFn = String.format( "%s%s%s.minReads%.2f%s.nuc%d.assoc%d%s.%s.%s.csv", this.outPrefix, motifString, withinStr, this.minReadsPerBp, fwOnlyStr, this.footprintSize, this.assocSize, normString, mnasePrefixStr, methPrefix); String outFnWig = outFn.replace(".csv", ".wig"); // PrintWriter pw = new PrintWriter(new FileOutputStream(outFn)); PrintWriter pwWig = new PrintWriter(new FileOutputStream(outFnWig)); // pw.printf("%s,%s\n","Meth", "ReadsPerBp"); pwWig.printf("track type=wiggle_0 name=\"%s\" description=\"%s\"\n", name, name); MethylDbQuerier params = new MethylDbQuerier(); params.setMinCTreads(this.minCTreads); params.setUseNonconversionFilter(!this.noNonconvFilter); params.setMaxOppstrandAfrac(this.maxOppStrandAfrac); params.setMaxNextNonGfrac(this.maxNextNonGfrac); if (this.withinFeat != null) params.addFeatFilter(this.withinFeat, this.featFlank); // We use the control table simply to limit the valid Cpg. This is a result of // our incorrect loading of Lister 2009 tables, which contains many Cpgs which // incorrectly yield a meth level of 0 at CpGs not covered in their sequencing data. // This was an artifact of the way they published their data, where they published // a list of methy-C positions without positions containing 0 mC reads, so we had // to add fake positions for all Cs in the genome, and we didn't know which ones // actually had coverage in their data. List<String> methTables = Arrays.asList(methPrefix, controlMethPrefix); // List<String> mnaseTables = Arrays.asList(mnasePrefix); int nSeries = 1; boolean useChipseq[] = new boolean[nSeries]; for (int i = 0; i < nSeries; i++) { // Wow talk about a special case. useChipseq[i] = !mnasePrefix.contains("mnase") && !mnasePrefix.contains("Schones"); } double[] methCounts = new double[METHCOUNTER_LEN]; double[] methTotals = new double[METHCOUNTER_LEN]; int chromNum = 1; for (String chr : Arrays.asList( "chr22")) // MethylDbUtils.CHROMS_MINUS_TWELVE) //MethylDbUtils.SMALL_CHROMS) // // Arrays.asList("chr22")) //,"chr18","chr19","chr20")) // { System.err.printf("On chrom %d (%s)\n", chromNum++, chr); String s = String.format("variableStep\tchrom=%s\n", chr); pwWig.append(s); int chrInt = (new ChromFeatures()).chrom_from_public_str(chr); // Iterator uses DB connection and can use a ton of memory because // it loads all rows at once. This stuff should really be added to iterator // class, but until it is , just iterate here over the chromosome int onCpg = 0; // Get the full array try { { ChromScoresFast counts[] = new ChromScoresFast[2]; ChromScoresFast meths[] = new ChromScoresFast[2]; int offs = 0; // 20000000; //0; int offe = 30000000; // 0; System.err.printf("offs=%d, offe=%d\n", offs, offe); // The mnase counts are the same for both cases counts = MethylDbUtils.chromScoresReadCounts( params, chr, this.mnasePrefix, this.refGenome, offs, offe); // We can filter counts by a particular motif if (this.filterByMotif != null) { ChromScoresFast filterByMotifCounts[] = new ChromScoresFast[2]; filterByMotifCounts = MethylDbUtils.chromScoresMotifCounts( chr, this.refGenome, this.filterByMotif, offs, offe, false); System.err.printf( "%s, filtering + strand for motif %s. Pre-filter count: %.0f\n", chr, this.filterByMotif, counts[0].getScoresTotal(chr)); counts[0].mask(filterByMotifCounts[0], this.filterByMotifInvert); // counts[0].mask(filterByMotifCounts[1],this.filterByMotifInvert); System.err.printf( "%s, filtering + strand for motif %s. Post-filter count: %.0f\n", chr, this.filterByMotif, counts[0].getScoresTotal(chr)); System.err.printf( "%s, filtering - strand for motif %s. Pre-filter count: %.0f\n", chr, this.filterByMotif, counts[1].getScoresTotal(chr)); counts[1].mask(filterByMotifCounts[1], this.filterByMotifInvert); // counts[1].mask(filterByMotifCounts[0],this.filterByMotifInvert); System.err.printf( "%s, filtering - strand for motif %s. Post-filter count: %.0f\n", chr, this.filterByMotif, counts[1].getScoresTotal(chr)); } // The meth differ if it's a motif if (this.autoMnase || this.autoMnaseFw || this.autoMnaseRev) { meths = counts; } else if (this.motif == null) { meths = MethylDbUtils.chromScoresMethLevels( params, chr, this.methPrefix, this.refGenome, offs, offe); } else { char[] seqArr = null; { Sequence seq = GoldAssembly.chromSeq(this.refGenome, chr); String seqStr = seq.seqString(); if (offe > 0) { seqStr = seqStr.substring(offs, offe); } seqArr = seqStr.toUpperCase().toCharArray(); } System.err.println("Seq length=" + seqArr.length); ChromScoresMotifPositions all = new ChromScoresMotifPositions(this.refGenome); System.err.printf( "About to populate both strands for %s for motif %s\n", chr, this.motif); StrandedFeature.Strand motifStrand = (this.fwStrandOnly) ? StrandedFeature.POSITIVE : StrandedFeature.UNKNOWN; all.populate(chr, this.motif, seqArr, offs, motifStrand); meths[0] = all; meths[1] = all; // System.err.printf("Fw score at %d = %d\n",24009253,counts[0].getScore(chr, // 24009253)); // WigOptions wo = new WigOptions(); // wo.f_step = 1; // //counts[0] = counts[0].smooth(50, 30); // counts[0].wigOutput("testFw.wig", wo); // counts[1].wigOutput("testRev.wig", wo); } int minPos = counts[0].chromMinPos(chr); System.err.println("Getting min pos: " + minPos); int maxPos = counts[0].chromMaxPos(chr); System.err.println("Getting max pos: " + maxPos); for (int pos = minPos; pos < maxPos; pos += 1) // this.step) { boolean fwRead = (counts[0].getScore(chr, pos).intValue() >= 1); boolean revRead = (counts[1].getScore(chr, pos).intValue() >= 1); if (this.fwStrandOnly) revRead = false; // fwRead=false; boolean enoughReads = true; MnaseOutput mnaseReads = new MnaseOutput(); mnaseReads.rawCounts = (fwRead) ? 1 : 0; if (this.minReadsPerBp > 0.0) { if (fwRead) { int nucCenter = pos + ((this.PERIODICITY - this.assocSize) / 2); mnaseReads = this.countMnaseReads(chr, nucCenter, counts[0], counts[1], useChipseq[0]); } else if (revRead) { int nucCenter = pos - ((this.PERIODICITY - this.assocSize) / 2); mnaseReads = this.countMnaseReads(chr, nucCenter, counts[0], counts[1], useChipseq[0]); } enoughReads = (mnaseReads.val >= this.minReadsPerBp); } // if (mnaseReads.rawCounts>2) System.err.printf("Raw counts=%d\n", // (int)mnaseReads.rawCounts); // enoughReads = true; if (enoughReads && fwRead) incrementMethCounts(chr, pos, false, methCounts, methTotals, meths, maxPos); if (enoughReads && revRead) incrementMethCounts(chr, pos, true, methCounts, methTotals, meths, maxPos); // if ((pos % 1E6) == 0) System.err.printf("On pos #%d, meth=%d\n", pos, (int) mnaseReads.rawCounts); if (enoughReads) { // pwWig.printf("%d\t%.2f\n",pos, mnaseReads.val); //// pwWig.printf("%d\t%.2f\t%.2f\t%s\n",pos,mnaseReads.rawCounts, // mnaseReads.val, fwRead?"+":"-"); // //// pw.printf("%d,%d", chrInt, pos); ////// pw.printf(",%.3f", mnaseReads.val); //// pw.printf(",%d", (int)mnaseReads.rawCounts); ////// pw.printf(",%.3f", mnaseReads.normWindCount); //// pw.println(); } } // Try to get rid of objects counts[0] = null; counts[1] = null; meths[0] = null; meths[1] = null; System.gc(); } } catch (Exception e) { System.err.printf("%s\nCouldn't do region %s\n", e.toString(), chr); e.printStackTrace(); } } // pw.close(); pwWig.close(); outFn = outFn.replace(".csv", ".methAlign.csv"); PrintWriter pw = new PrintWriter(new FileOutputStream(outFn)); ListUtils.setDelim(","); pw.println(ListUtils.excelLine(methCounts)); pw.println(ListUtils.excelLine(methTotals)); pw.close(); } // Main