コード例 #1
0
  public void doMain(String[] args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    // if you have a wider console, you could increase the value;
    // here 80 is also the default
    parser.setUsageWidth(80);
    try {
      parser.parseArgument(args);

      if ((arguments.size() != 1)) {
        System.err.println(C_USAGE);
        parser.printUsage(System.err);
        System.exit(1);
      }

      this.motif = arguments.get(0);
      if ((outPrefix == null)) {
        System.err.println("Must specify outPrefix and at least one mnasePrefix");
        parser.printUsage(System.err);
        System.err.println(C_USAGE);
        System.exit(1);
      }

    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      System.err.println(C_USAGE);
      // print the list of available options
      parser.printUsage(System.err);
      System.err.println();
      return;
    }

    Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).setLevel(Level.SEVERE);

    // Setup output files and print domain finders
    ListUtils.setDelim("-");
    String mnasePrefixStr = "";
    String strandSec = (this.onlyFwMotifs) ? ".fwStrandOnly" : ".bothStrands";
    String normString =
        (this.normalizationWindow == 0)
            ? ""
            : String.format(".normalized%dbpWind", this.normalizationWindow);
    String motifString =
        (this.motif == null) ? "" : String.format(".motif%s", this.motif.toUpperCase());
    String withinStr =
        (this.withinFeat == null) ? "" : String.format(".withinFeat-%s", this.withinFeat);

    String name =
        String.format(
            "nucleosomeReads.%s%s%s.minReads%.2f.nuc%d.assoc%d%s.%s.%s",
            this.outPrefix,
            motifString,
            withinStr,
            this.minReadsPerBp,
            this.footprintSize,
            this.assocSize,
            strandSec,
            mnasePrefixStr,
            methPrefix);
    String outFn =
        String.format(
            "%s%s%s.minReads%.2f.nuc%d.assoc%d%s.%s.%s.csv",
            this.outPrefix,
            motifString,
            withinStr,
            this.minReadsPerBp,
            this.footprintSize,
            this.assocSize,
            strandSec,
            mnasePrefixStr,
            methPrefix);

    MethylDbQuerier params = new MethylDbQuerier();
    params.setMinCTreads(this.minCTreads);
    params.setUseNonconversionFilter(!this.noNonconvFilter);
    params.setMaxOppstrandAfrac(this.maxOppStrandAfrac);
    params.setMaxNextNonGfrac(this.maxNextNonGfrac);
    if (this.withinFeat != null) params.addFeatFilter(this.withinFeat, this.featFlank);

    // We use the control table simply to limit the valid Cpg.  This is a result of
    // our incorrect loading of Lister 2009 tables, which contains many Cpgs which
    // incorrectly yield a meth level of 0 at CpGs not covered in their sequencing data.
    // This was an artifact of the way they published their data, where they published
    // a list of methy-C positions without positions containing 0 mC reads, so we had
    // to add fake positions for all Cs in the genome, and we didn't know which ones
    // actually had coverage in their data.
    List<String> methTables = Arrays.asList(methPrefix, controlMethPrefix);
    //	List<String> mnaseTables = Arrays.asList(mnasePrefix);

    int nSeries = 1;

    double[] methCounts = new double[METHCOUNTER_LEN];
    double[] methTotals = new double[METHCOUNTER_LEN];

    int chromNum = 1;
    for (String chr : MethylDbUtils.CHROMS_MINUS_TWELVE) //  Arrays.asList("chr21","chr22")) //
    {
      System.err.printf("On chrom %d (%s)\n", chromNum++, chr);
      String s = String.format("variableStep\tchrom=%s\n", chr);
      int chrInt = (new ChromFeatures()).chrom_from_public_str(chr);

      // Iterator uses DB connection and can use a ton of memory because
      // it loads all rows at once.  This stuff should really be added to iterator
      // class, but until it is , just iterate here over the chromosome
      int onCpg = 0;

      // Get the full array

      try {
        {
          ChromScoresFast counts[] = new ChromScoresFast[2];
          ChromScoresFast meths[] = new ChromScoresFast[2];

          int offs = 0; // 20000000; //0;
          int offe = 0; // 21000000; //0;
          System.err.printf("offs=%d, offe=%d\n", offs, offe);

          counts =
              MethylDbUtils.chromScoresMotifCounts(
                  chr, this.refGenome, this.motif, offs, offe, this.onlyFwMotifs);
          meths =
              MethylDbUtils.chromScoresMethLevels(
                  params, chr, this.methPrefix, this.refGenome, offs, offe);

          int minPos = counts[0].chromMinPos(chr);
          System.err.println("Getting min pos: " + minPos);
          int maxPos = counts[0].chromMaxPos(chr);
          System.err.println("Getting max pos: " + maxPos);

          for (int pos = minPos; pos < maxPos; pos += 1) // this.step)
          {

            boolean fwRead = (counts[0].getScore(chr, pos).intValue() >= 1);
            boolean revRead =
                (this.onlyFwMotifs) ? false : (counts[1].getScore(chr, pos).intValue() >= 1);

            boolean enoughReads = true;

            // if (mnaseReads.rawCounts>2) System.err.printf("Raw counts=%d\n",
            // (int)mnaseReads.rawCounts);

            // enoughReads = true;

            if (enoughReads && fwRead)
              incrementMethCounts(chr, pos, false, methCounts, methTotals, meths, maxPos);
            if (enoughReads && revRead)
              incrementMethCounts(chr, pos, true, methCounts, methTotals, meths, maxPos);

            //
            if ((pos % 1E6) == 0) System.err.printf("On pos #%d\n", pos);
          }

          // Try to get rid of objects
          counts[0] = null;
          counts[1] = null;
          meths[0] = null;
          meths[1] = null;
          System.gc();
        }

      } catch (Exception e) {
        System.err.printf("%s\nCouldn't do region %s\n", e.toString(), chr);
        e.printStackTrace();
      }
    }

    //		pw.close();

    outFn = outFn.replace(".csv", ".methAlign.csv");
    PrintWriter pw = new PrintWriter(new FileOutputStream(outFn));
    ListUtils.setDelim(",");
    pw.println(ListUtils.excelLine(methCounts));
    pw.println(ListUtils.excelLine(methTotals));
    pw.close();
  } // Main
コード例 #2
0
  public void doMain(String[] args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    // if you have a wider console, you could increase the value;
    // here 80 is also the default
    parser.setUsageWidth(80);
    try {
      parser.parseArgument(args);

      if ((arguments.size() != 0)) {
        System.err.println(C_USAGE);
        parser.printUsage(System.err);
        System.exit(1);
      }

      if ((outPrefix == null)) {
        System.err.println("Must specify outPrefix and at least one mnasePrefix");
        parser.printUsage(System.err);
        System.err.println(C_USAGE);
        System.exit(1);
      }

    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      System.err.println(C_USAGE);
      // print the list of available options
      parser.printUsage(System.err);
      System.err.println();
      return;
    }

    Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).setLevel(Level.SEVERE);

    // Setup output files and print domain finders
    ListUtils.setDelim("-");
    String mnasePrefixStr = mnasePrefix;
    String normString =
        (this.normalizationWindow == 0)
            ? ""
            : String.format(".normalized%dbpWind", this.normalizationWindow);
    String motifString =
        (this.motif == null) ? "" : String.format(".motif%s", this.motif.toUpperCase());
    String withinStr =
        (this.withinFeat == null) ? "" : String.format(".withinFeat-%s", this.withinFeat);

    if (this.filterByMotif != null)
      motifString +=
          String.format(
              ".filteredByMotif%s%s%s",
              this.filterByMotif,
              (this.filterByMotifInvert) ? "-inverted" : "",
              (this.filterByMotifOffset != 0)
                  ? String.format("-offset%d", this.filterByMotifOffset)
                  : "");
    String fwOnlyStr = (this.fwStrandOnly) ? ".fwStrandOnly" : "";

    String name =
        String.format(
            "nucleosomeReads.%s%s%s.minReads%.2f%s.nuc%d.assoc%d%s.%s.%s",
            this.outPrefix,
            motifString,
            withinStr,
            this.minReadsPerBp,
            fwOnlyStr,
            this.footprintSize,
            this.assocSize,
            normString,
            mnasePrefixStr,
            methPrefix);
    String outFn =
        String.format(
            "%s%s%s.minReads%.2f%s.nuc%d.assoc%d%s.%s.%s.csv",
            this.outPrefix,
            motifString,
            withinStr,
            this.minReadsPerBp,
            fwOnlyStr,
            this.footprintSize,
            this.assocSize,
            normString,
            mnasePrefixStr,
            methPrefix);
    String outFnWig = outFn.replace(".csv", ".wig");
    //		PrintWriter pw = new PrintWriter(new FileOutputStream(outFn));
    PrintWriter pwWig = new PrintWriter(new FileOutputStream(outFnWig));
    // pw.printf("%s,%s\n","Meth", "ReadsPerBp");
    pwWig.printf("track type=wiggle_0 name=\"%s\" description=\"%s\"\n", name, name);

    MethylDbQuerier params = new MethylDbQuerier();
    params.setMinCTreads(this.minCTreads);
    params.setUseNonconversionFilter(!this.noNonconvFilter);
    params.setMaxOppstrandAfrac(this.maxOppStrandAfrac);
    params.setMaxNextNonGfrac(this.maxNextNonGfrac);
    if (this.withinFeat != null) params.addFeatFilter(this.withinFeat, this.featFlank);

    // We use the control table simply to limit the valid Cpg.  This is a result of
    // our incorrect loading of Lister 2009 tables, which contains many Cpgs which
    // incorrectly yield a meth level of 0 at CpGs not covered in their sequencing data.
    // This was an artifact of the way they published their data, where they published
    // a list of methy-C positions without positions containing 0 mC reads, so we had
    // to add fake positions for all Cs in the genome, and we didn't know which ones
    // actually had coverage in their data.
    List<String> methTables = Arrays.asList(methPrefix, controlMethPrefix);
    //	List<String> mnaseTables = Arrays.asList(mnasePrefix);

    int nSeries = 1;
    boolean useChipseq[] = new boolean[nSeries];
    for (int i = 0; i < nSeries; i++) {
      // Wow talk about a special case.
      useChipseq[i] = !mnasePrefix.contains("mnase") && !mnasePrefix.contains("Schones");
    }

    double[] methCounts = new double[METHCOUNTER_LEN];
    double[] methTotals = new double[METHCOUNTER_LEN];

    int chromNum = 1;
    for (String chr :
        Arrays.asList(
            "chr22")) // MethylDbUtils.CHROMS_MINUS_TWELVE) //MethylDbUtils.SMALL_CHROMS) //
    // Arrays.asList("chr22")) //,"chr18","chr19","chr20")) //
    {
      System.err.printf("On chrom %d (%s)\n", chromNum++, chr);
      String s = String.format("variableStep\tchrom=%s\n", chr);
      pwWig.append(s);
      int chrInt = (new ChromFeatures()).chrom_from_public_str(chr);

      // Iterator uses DB connection and can use a ton of memory because
      // it loads all rows at once.  This stuff should really be added to iterator
      // class, but until it is , just iterate here over the chromosome
      int onCpg = 0;

      // Get the full array

      try {
        {
          ChromScoresFast counts[] = new ChromScoresFast[2];
          ChromScoresFast meths[] = new ChromScoresFast[2];

          int offs = 0; // 20000000; //0;
          int offe = 30000000; // 0;
          System.err.printf("offs=%d, offe=%d\n", offs, offe);

          // The mnase counts are the same for both cases
          counts =
              MethylDbUtils.chromScoresReadCounts(
                  params, chr, this.mnasePrefix, this.refGenome, offs, offe);

          // We can filter counts by a particular motif
          if (this.filterByMotif != null) {
            ChromScoresFast filterByMotifCounts[] = new ChromScoresFast[2];
            filterByMotifCounts =
                MethylDbUtils.chromScoresMotifCounts(
                    chr, this.refGenome, this.filterByMotif, offs, offe, false);

            System.err.printf(
                "%s, filtering + strand for motif %s. Pre-filter count: %.0f\n",
                chr, this.filterByMotif, counts[0].getScoresTotal(chr));
            counts[0].mask(filterByMotifCounts[0], this.filterByMotifInvert);
            //						counts[0].mask(filterByMotifCounts[1],this.filterByMotifInvert);
            System.err.printf(
                "%s, filtering + strand for motif %s. Post-filter count: %.0f\n",
                chr, this.filterByMotif, counts[0].getScoresTotal(chr));
            System.err.printf(
                "%s, filtering - strand for motif %s. Pre-filter count: %.0f\n",
                chr, this.filterByMotif, counts[1].getScoresTotal(chr));
            counts[1].mask(filterByMotifCounts[1], this.filterByMotifInvert);
            //						counts[1].mask(filterByMotifCounts[0],this.filterByMotifInvert);
            System.err.printf(
                "%s, filtering - strand for motif %s. Post-filter count: %.0f\n",
                chr, this.filterByMotif, counts[1].getScoresTotal(chr));
          }

          // The meth differ if it's a motif
          if (this.autoMnase || this.autoMnaseFw || this.autoMnaseRev) {
            meths = counts;
          } else if (this.motif == null) {
            meths =
                MethylDbUtils.chromScoresMethLevels(
                    params, chr, this.methPrefix, this.refGenome, offs, offe);
          } else {

            char[] seqArr = null;
            {
              Sequence seq = GoldAssembly.chromSeq(this.refGenome, chr);
              String seqStr = seq.seqString();
              if (offe > 0) {
                seqStr = seqStr.substring(offs, offe);
              }
              seqArr = seqStr.toUpperCase().toCharArray();
            }

            System.err.println("Seq length=" + seqArr.length);

            ChromScoresMotifPositions all = new ChromScoresMotifPositions(this.refGenome);
            System.err.printf(
                "About to populate both strands for %s for motif %s\n", chr, this.motif);
            StrandedFeature.Strand motifStrand =
                (this.fwStrandOnly) ? StrandedFeature.POSITIVE : StrandedFeature.UNKNOWN;
            all.populate(chr, this.motif, seqArr, offs, motifStrand);

            meths[0] = all;
            meths[1] = all;

            //						System.err.printf("Fw score at %d = %d\n",24009253,counts[0].getScore(chr,
            // 24009253));
            //						WigOptions wo = new WigOptions();
            //						wo.f_step = 1;
            //						//counts[0] = counts[0].smooth(50, 30);
            //						counts[0].wigOutput("testFw.wig", wo);
            //						counts[1].wigOutput("testRev.wig", wo);

          }

          int minPos = counts[0].chromMinPos(chr);
          System.err.println("Getting min pos: " + minPos);
          int maxPos = counts[0].chromMaxPos(chr);
          System.err.println("Getting max pos: " + maxPos);

          for (int pos = minPos; pos < maxPos; pos += 1) // this.step)
          {

            boolean fwRead = (counts[0].getScore(chr, pos).intValue() >= 1);
            boolean revRead = (counts[1].getScore(chr, pos).intValue() >= 1);
            if (this.fwStrandOnly) revRead = false; // fwRead=false;

            boolean enoughReads = true;
            MnaseOutput mnaseReads = new MnaseOutput();
            mnaseReads.rawCounts = (fwRead) ? 1 : 0;
            if (this.minReadsPerBp > 0.0) {
              if (fwRead) {
                int nucCenter = pos + ((this.PERIODICITY - this.assocSize) / 2);
                mnaseReads =
                    this.countMnaseReads(chr, nucCenter, counts[0], counts[1], useChipseq[0]);
              } else if (revRead) {
                int nucCenter = pos - ((this.PERIODICITY - this.assocSize) / 2);
                mnaseReads =
                    this.countMnaseReads(chr, nucCenter, counts[0], counts[1], useChipseq[0]);
              }
              enoughReads = (mnaseReads.val >= this.minReadsPerBp);
            }

            // if (mnaseReads.rawCounts>2) System.err.printf("Raw counts=%d\n",
            // (int)mnaseReads.rawCounts);

            // enoughReads = true;

            if (enoughReads && fwRead)
              incrementMethCounts(chr, pos, false, methCounts, methTotals, meths, maxPos);
            if (enoughReads && revRead)
              incrementMethCounts(chr, pos, true, methCounts, methTotals, meths, maxPos);

            //
            if ((pos % 1E6) == 0)
              System.err.printf("On pos #%d, meth=%d\n", pos, (int) mnaseReads.rawCounts);

            if (enoughReads) {
              //							pwWig.printf("%d\t%.2f\n",pos, mnaseReads.val);
              ////							pwWig.printf("%d\t%.2f\t%.2f\t%s\n",pos,mnaseReads.rawCounts,
              // mnaseReads.val, fwRead?"+":"-");
              //
              ////							pw.printf("%d,%d", chrInt, pos);
              //////							pw.printf(",%.3f", mnaseReads.val);
              ////							pw.printf(",%d", (int)mnaseReads.rawCounts);
              //////							pw.printf(",%.3f", mnaseReads.normWindCount);
              ////							pw.println();
            }
          }

          // Try to get rid of objects
          counts[0] = null;
          counts[1] = null;
          meths[0] = null;
          meths[1] = null;
          System.gc();
        }

      } catch (Exception e) {
        System.err.printf("%s\nCouldn't do region %s\n", e.toString(), chr);
        e.printStackTrace();
      }
    }

    //		pw.close();
    pwWig.close();

    outFn = outFn.replace(".csv", ".methAlign.csv");
    PrintWriter pw = new PrintWriter(new FileOutputStream(outFn));
    ListUtils.setDelim(",");
    pw.println(ListUtils.excelLine(methCounts));
    pw.println(ListUtils.excelLine(methTotals));
    pw.close();
  } // Main