public void doMain(String[] args) throws Exception { CmdLineParser parser = new CmdLineParser(this); // if you have a wider console, you could increase the value; // here 80 is also the default parser.setUsageWidth(80); try { parser.parseArgument(args); if ((arguments.size() != 1)) { System.err.println(C_USAGE); parser.printUsage(System.err); System.exit(1); } this.motif = arguments.get(0); if ((outPrefix == null)) { System.err.println("Must specify outPrefix and at least one mnasePrefix"); parser.printUsage(System.err); System.err.println(C_USAGE); System.exit(1); } } catch (CmdLineException e) { System.err.println(e.getMessage()); System.err.println(C_USAGE); // print the list of available options parser.printUsage(System.err); System.err.println(); return; } Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).setLevel(Level.SEVERE); // Setup output files and print domain finders ListUtils.setDelim("-"); String mnasePrefixStr = ""; String strandSec = (this.onlyFwMotifs) ? ".fwStrandOnly" : ".bothStrands"; String normString = (this.normalizationWindow == 0) ? "" : String.format(".normalized%dbpWind", this.normalizationWindow); String motifString = (this.motif == null) ? "" : String.format(".motif%s", this.motif.toUpperCase()); String withinStr = (this.withinFeat == null) ? "" : String.format(".withinFeat-%s", this.withinFeat); String name = String.format( "nucleosomeReads.%s%s%s.minReads%.2f.nuc%d.assoc%d%s.%s.%s", this.outPrefix, motifString, withinStr, this.minReadsPerBp, this.footprintSize, this.assocSize, strandSec, mnasePrefixStr, methPrefix); String outFn = String.format( "%s%s%s.minReads%.2f.nuc%d.assoc%d%s.%s.%s.csv", this.outPrefix, motifString, withinStr, this.minReadsPerBp, this.footprintSize, this.assocSize, strandSec, mnasePrefixStr, methPrefix); MethylDbQuerier params = new MethylDbQuerier(); params.setMinCTreads(this.minCTreads); params.setUseNonconversionFilter(!this.noNonconvFilter); params.setMaxOppstrandAfrac(this.maxOppStrandAfrac); params.setMaxNextNonGfrac(this.maxNextNonGfrac); if (this.withinFeat != null) params.addFeatFilter(this.withinFeat, this.featFlank); // We use the control table simply to limit the valid Cpg. This is a result of // our incorrect loading of Lister 2009 tables, which contains many Cpgs which // incorrectly yield a meth level of 0 at CpGs not covered in their sequencing data. // This was an artifact of the way they published their data, where they published // a list of methy-C positions without positions containing 0 mC reads, so we had // to add fake positions for all Cs in the genome, and we didn't know which ones // actually had coverage in their data. List<String> methTables = Arrays.asList(methPrefix, controlMethPrefix); // List<String> mnaseTables = Arrays.asList(mnasePrefix); int nSeries = 1; double[] methCounts = new double[METHCOUNTER_LEN]; double[] methTotals = new double[METHCOUNTER_LEN]; int chromNum = 1; for (String chr : MethylDbUtils.CHROMS_MINUS_TWELVE) // Arrays.asList("chr21","chr22")) // { System.err.printf("On chrom %d (%s)\n", chromNum++, chr); String s = String.format("variableStep\tchrom=%s\n", chr); int chrInt = (new ChromFeatures()).chrom_from_public_str(chr); // Iterator uses DB connection and can use a ton of memory because // it loads all rows at once. This stuff should really be added to iterator // class, but until it is , just iterate here over the chromosome int onCpg = 0; // Get the full array try { { ChromScoresFast counts[] = new ChromScoresFast[2]; ChromScoresFast meths[] = new ChromScoresFast[2]; int offs = 0; // 20000000; //0; int offe = 0; // 21000000; //0; System.err.printf("offs=%d, offe=%d\n", offs, offe); counts = MethylDbUtils.chromScoresMotifCounts( chr, this.refGenome, this.motif, offs, offe, this.onlyFwMotifs); meths = MethylDbUtils.chromScoresMethLevels( params, chr, this.methPrefix, this.refGenome, offs, offe); int minPos = counts[0].chromMinPos(chr); System.err.println("Getting min pos: " + minPos); int maxPos = counts[0].chromMaxPos(chr); System.err.println("Getting max pos: " + maxPos); for (int pos = minPos; pos < maxPos; pos += 1) // this.step) { boolean fwRead = (counts[0].getScore(chr, pos).intValue() >= 1); boolean revRead = (this.onlyFwMotifs) ? false : (counts[1].getScore(chr, pos).intValue() >= 1); boolean enoughReads = true; // if (mnaseReads.rawCounts>2) System.err.printf("Raw counts=%d\n", // (int)mnaseReads.rawCounts); // enoughReads = true; if (enoughReads && fwRead) incrementMethCounts(chr, pos, false, methCounts, methTotals, meths, maxPos); if (enoughReads && revRead) incrementMethCounts(chr, pos, true, methCounts, methTotals, meths, maxPos); // if ((pos % 1E6) == 0) System.err.printf("On pos #%d\n", pos); } // Try to get rid of objects counts[0] = null; counts[1] = null; meths[0] = null; meths[1] = null; System.gc(); } } catch (Exception e) { System.err.printf("%s\nCouldn't do region %s\n", e.toString(), chr); e.printStackTrace(); } } // pw.close(); outFn = outFn.replace(".csv", ".methAlign.csv"); PrintWriter pw = new PrintWriter(new FileOutputStream(outFn)); ListUtils.setDelim(","); pw.println(ListUtils.excelLine(methCounts)); pw.println(ListUtils.excelLine(methTotals)); pw.close(); } // Main
public void doMain(String[] args) throws Exception { CmdLineParser parser = new CmdLineParser(this); // if you have a wider console, you could increase the value; // here 80 is also the default parser.setUsageWidth(80); try { parser.parseArgument(args); if ((arguments.size() != 0)) { System.err.println(C_USAGE); parser.printUsage(System.err); System.exit(1); } if ((outPrefix == null)) { System.err.println("Must specify outPrefix and at least one mnasePrefix"); parser.printUsage(System.err); System.err.println(C_USAGE); System.exit(1); } } catch (CmdLineException e) { System.err.println(e.getMessage()); System.err.println(C_USAGE); // print the list of available options parser.printUsage(System.err); System.err.println(); return; } Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).setLevel(Level.SEVERE); // Setup output files and print domain finders ListUtils.setDelim("-"); String mnasePrefixStr = mnasePrefix; String normString = (this.normalizationWindow == 0) ? "" : String.format(".normalized%dbpWind", this.normalizationWindow); String motifString = (this.motif == null) ? "" : String.format(".motif%s", this.motif.toUpperCase()); String withinStr = (this.withinFeat == null) ? "" : String.format(".withinFeat-%s", this.withinFeat); if (this.filterByMotif != null) motifString += String.format( ".filteredByMotif%s%s%s", this.filterByMotif, (this.filterByMotifInvert) ? "-inverted" : "", (this.filterByMotifOffset != 0) ? String.format("-offset%d", this.filterByMotifOffset) : ""); String fwOnlyStr = (this.fwStrandOnly) ? ".fwStrandOnly" : ""; String name = String.format( "nucleosomeReads.%s%s%s.minReads%.2f%s.nuc%d.assoc%d%s.%s.%s", this.outPrefix, motifString, withinStr, this.minReadsPerBp, fwOnlyStr, this.footprintSize, this.assocSize, normString, mnasePrefixStr, methPrefix); String outFn = String.format( "%s%s%s.minReads%.2f%s.nuc%d.assoc%d%s.%s.%s.csv", this.outPrefix, motifString, withinStr, this.minReadsPerBp, fwOnlyStr, this.footprintSize, this.assocSize, normString, mnasePrefixStr, methPrefix); String outFnWig = outFn.replace(".csv", ".wig"); // PrintWriter pw = new PrintWriter(new FileOutputStream(outFn)); PrintWriter pwWig = new PrintWriter(new FileOutputStream(outFnWig)); // pw.printf("%s,%s\n","Meth", "ReadsPerBp"); pwWig.printf("track type=wiggle_0 name=\"%s\" description=\"%s\"\n", name, name); MethylDbQuerier params = new MethylDbQuerier(); params.setMinCTreads(this.minCTreads); params.setUseNonconversionFilter(!this.noNonconvFilter); params.setMaxOppstrandAfrac(this.maxOppStrandAfrac); params.setMaxNextNonGfrac(this.maxNextNonGfrac); if (this.withinFeat != null) params.addFeatFilter(this.withinFeat, this.featFlank); // We use the control table simply to limit the valid Cpg. This is a result of // our incorrect loading of Lister 2009 tables, which contains many Cpgs which // incorrectly yield a meth level of 0 at CpGs not covered in their sequencing data. // This was an artifact of the way they published their data, where they published // a list of methy-C positions without positions containing 0 mC reads, so we had // to add fake positions for all Cs in the genome, and we didn't know which ones // actually had coverage in their data. List<String> methTables = Arrays.asList(methPrefix, controlMethPrefix); // List<String> mnaseTables = Arrays.asList(mnasePrefix); int nSeries = 1; boolean useChipseq[] = new boolean[nSeries]; for (int i = 0; i < nSeries; i++) { // Wow talk about a special case. useChipseq[i] = !mnasePrefix.contains("mnase") && !mnasePrefix.contains("Schones"); } double[] methCounts = new double[METHCOUNTER_LEN]; double[] methTotals = new double[METHCOUNTER_LEN]; int chromNum = 1; for (String chr : Arrays.asList( "chr22")) // MethylDbUtils.CHROMS_MINUS_TWELVE) //MethylDbUtils.SMALL_CHROMS) // // Arrays.asList("chr22")) //,"chr18","chr19","chr20")) // { System.err.printf("On chrom %d (%s)\n", chromNum++, chr); String s = String.format("variableStep\tchrom=%s\n", chr); pwWig.append(s); int chrInt = (new ChromFeatures()).chrom_from_public_str(chr); // Iterator uses DB connection and can use a ton of memory because // it loads all rows at once. This stuff should really be added to iterator // class, but until it is , just iterate here over the chromosome int onCpg = 0; // Get the full array try { { ChromScoresFast counts[] = new ChromScoresFast[2]; ChromScoresFast meths[] = new ChromScoresFast[2]; int offs = 0; // 20000000; //0; int offe = 30000000; // 0; System.err.printf("offs=%d, offe=%d\n", offs, offe); // The mnase counts are the same for both cases counts = MethylDbUtils.chromScoresReadCounts( params, chr, this.mnasePrefix, this.refGenome, offs, offe); // We can filter counts by a particular motif if (this.filterByMotif != null) { ChromScoresFast filterByMotifCounts[] = new ChromScoresFast[2]; filterByMotifCounts = MethylDbUtils.chromScoresMotifCounts( chr, this.refGenome, this.filterByMotif, offs, offe, false); System.err.printf( "%s, filtering + strand for motif %s. Pre-filter count: %.0f\n", chr, this.filterByMotif, counts[0].getScoresTotal(chr)); counts[0].mask(filterByMotifCounts[0], this.filterByMotifInvert); // counts[0].mask(filterByMotifCounts[1],this.filterByMotifInvert); System.err.printf( "%s, filtering + strand for motif %s. Post-filter count: %.0f\n", chr, this.filterByMotif, counts[0].getScoresTotal(chr)); System.err.printf( "%s, filtering - strand for motif %s. Pre-filter count: %.0f\n", chr, this.filterByMotif, counts[1].getScoresTotal(chr)); counts[1].mask(filterByMotifCounts[1], this.filterByMotifInvert); // counts[1].mask(filterByMotifCounts[0],this.filterByMotifInvert); System.err.printf( "%s, filtering - strand for motif %s. Post-filter count: %.0f\n", chr, this.filterByMotif, counts[1].getScoresTotal(chr)); } // The meth differ if it's a motif if (this.autoMnase || this.autoMnaseFw || this.autoMnaseRev) { meths = counts; } else if (this.motif == null) { meths = MethylDbUtils.chromScoresMethLevels( params, chr, this.methPrefix, this.refGenome, offs, offe); } else { char[] seqArr = null; { Sequence seq = GoldAssembly.chromSeq(this.refGenome, chr); String seqStr = seq.seqString(); if (offe > 0) { seqStr = seqStr.substring(offs, offe); } seqArr = seqStr.toUpperCase().toCharArray(); } System.err.println("Seq length=" + seqArr.length); ChromScoresMotifPositions all = new ChromScoresMotifPositions(this.refGenome); System.err.printf( "About to populate both strands for %s for motif %s\n", chr, this.motif); StrandedFeature.Strand motifStrand = (this.fwStrandOnly) ? StrandedFeature.POSITIVE : StrandedFeature.UNKNOWN; all.populate(chr, this.motif, seqArr, offs, motifStrand); meths[0] = all; meths[1] = all; // System.err.printf("Fw score at %d = %d\n",24009253,counts[0].getScore(chr, // 24009253)); // WigOptions wo = new WigOptions(); // wo.f_step = 1; // //counts[0] = counts[0].smooth(50, 30); // counts[0].wigOutput("testFw.wig", wo); // counts[1].wigOutput("testRev.wig", wo); } int minPos = counts[0].chromMinPos(chr); System.err.println("Getting min pos: " + minPos); int maxPos = counts[0].chromMaxPos(chr); System.err.println("Getting max pos: " + maxPos); for (int pos = minPos; pos < maxPos; pos += 1) // this.step) { boolean fwRead = (counts[0].getScore(chr, pos).intValue() >= 1); boolean revRead = (counts[1].getScore(chr, pos).intValue() >= 1); if (this.fwStrandOnly) revRead = false; // fwRead=false; boolean enoughReads = true; MnaseOutput mnaseReads = new MnaseOutput(); mnaseReads.rawCounts = (fwRead) ? 1 : 0; if (this.minReadsPerBp > 0.0) { if (fwRead) { int nucCenter = pos + ((this.PERIODICITY - this.assocSize) / 2); mnaseReads = this.countMnaseReads(chr, nucCenter, counts[0], counts[1], useChipseq[0]); } else if (revRead) { int nucCenter = pos - ((this.PERIODICITY - this.assocSize) / 2); mnaseReads = this.countMnaseReads(chr, nucCenter, counts[0], counts[1], useChipseq[0]); } enoughReads = (mnaseReads.val >= this.minReadsPerBp); } // if (mnaseReads.rawCounts>2) System.err.printf("Raw counts=%d\n", // (int)mnaseReads.rawCounts); // enoughReads = true; if (enoughReads && fwRead) incrementMethCounts(chr, pos, false, methCounts, methTotals, meths, maxPos); if (enoughReads && revRead) incrementMethCounts(chr, pos, true, methCounts, methTotals, meths, maxPos); // if ((pos % 1E6) == 0) System.err.printf("On pos #%d, meth=%d\n", pos, (int) mnaseReads.rawCounts); if (enoughReads) { // pwWig.printf("%d\t%.2f\n",pos, mnaseReads.val); //// pwWig.printf("%d\t%.2f\t%.2f\t%s\n",pos,mnaseReads.rawCounts, // mnaseReads.val, fwRead?"+":"-"); // //// pw.printf("%d,%d", chrInt, pos); ////// pw.printf(",%.3f", mnaseReads.val); //// pw.printf(",%d", (int)mnaseReads.rawCounts); ////// pw.printf(",%.3f", mnaseReads.normWindCount); //// pw.println(); } } // Try to get rid of objects counts[0] = null; counts[1] = null; meths[0] = null; meths[1] = null; System.gc(); } } catch (Exception e) { System.err.printf("%s\nCouldn't do region %s\n", e.toString(), chr); e.printStackTrace(); } } // pw.close(); pwWig.close(); outFn = outFn.replace(".csv", ".methAlign.csv"); PrintWriter pw = new PrintWriter(new FileOutputStream(outFn)); ListUtils.setDelim(","); pw.println(ListUtils.excelLine(methCounts)); pw.println(ListUtils.excelLine(methTotals)); pw.close(); } // Main