Esempio n. 1
1
 public Bed2Bar(String[] args) {
   try {
     processArgs(args);
     // load Window[]
     for (int i = 0; i < bedFiles.length; i++) {
       bedFile = bedFiles[i];
       System.out.println("Parsing " + bedFile.getName());
       bedLinesHash = Bed.parseBedFile(bedFile, true, false);
       if (bedLinesHash == null || bedLinesHash.size() == 0) {
         System.out.println("Problem parsing bed file, skipping!");
         continue;
       }
       barDirectory = IO.makeDirectory(bedFile, "");
       File bedOutFile =
           new File(Misc.removeExtension(bedFile.toString()) + "_" + threshold + "_Filt.bed");
       bedOut = new PrintWriter(new FileWriter(bedOutFile));
       makeStairStepBarFiles();
     }
     bedOut.close();
     System.out.println("\nDone!\n");
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
 public String fetchCleanupReviewDirLinkGenXml() {
   String s = null;
   try {
     s = // Organize and Clean Up Section
         "<ReviewDirGenerator class='operator.ReviewDirGenerator' destination.dir='"
             + outputDirectory.getCanonicalPath()
             + "' sample='"
             + outputDirectory.getName()
             + "' submitter='"
             + submitter
             + "' analysis.type='"
             + analysisType
             + "'> \n"
             + "    <finalVariants /> \n"
             + "    <ViewerCSV /> \n"
             + "    <InstanceLog class='buffer.InstanceLogFile' /> \n"
             + "    <finalBAM /> \n"
             + "    <QCOutputFile /> \n"
             + "    <bedForVars /> \n"
             + "</ReviewDirGenerator> \n\n";
   } catch (IOException e) {
     e.printStackTrace();
     Misc.printErrAndExit("\nError fetching clean up and review dir links.\n");
   }
   return s;
 }
  public void checkPrintFiles() {
    LinkedHashMap<String, File> nameFile = new LinkedHashMap<String, File>();
    nameFile.put("Reference directory", referenceDir);
    nameFile.put("Pipeline.jar", pJar);
    nameFile.put("Pipeline properties", truncPipePropFile);
    nameFile.put("Coverage QC bed", bedForCoverageQC);
    nameFile.put("Variant calling bed", bedForVarCalling);
    nameFile.put("Fasta genome reference", fastaReference);
    nameFile.put("Unfiltered bam", unfilteredBam);
    nameFile.put("Final bam", finalBam);
    nameFile.put("Final vcf", finalVcf);
    if (webRootForLinks != null) nameFile.put("Web links dir", webRootForLinks);
    nameFile.put("Final output dir", outputDirectory);

    boolean missingFile = false;
    System.out.println("\nResources (name exists path):");
    for (String name : nameFile.keySet()) {
      File f = nameFile.get(name);
      boolean fExists = true;
      if (f == null) {
        fExists = false;
        missingFile = true;
      } else {
        fExists = f.exists();
        if (fExists == false) missingFile = true;
      }
      System.out.println(name + "\t" + fExists + "\t" + f);
    }
    if (missingFile) Misc.printErrAndExit("\nMissing resources! See above.");
  }
Esempio n. 4
0
  /** This method will process each argument and assign new varibles */
  public void processArgs(String[] args) {
    File file = null;
    Pattern pat = Pattern.compile("-[a-z]");
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'f':
              file = new File(args[i + 1]);
              i++;
              break;
            case 'v':
              genomeVersion = args[i + 1];
              i++;
              break;
            case 's':
              sumScores = true;
              break;
            case 't':
              threshold = Float.parseFloat(args[++i]);
              break;
            case 'h':
              printDocs();
              System.exit(0);
            default:
              Misc.printExit("\nError, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          System.out.print("\nSorry, something doesn't look right with this parameter: -" + test);
          System.out.println();
          System.exit(0);
        }
      }
    }
    if (file == null || file.exists() == false)
      Misc.printErrAndExit("Problem finding your bed files!\n");
    // pull files
    File[][] tot = new File[3][];
    tot[0] = IO.extractFiles(file, ".bed");
    tot[1] = IO.extractFiles(file, ".bed.zip");
    tot[2] = IO.extractFiles(file, ".bed.gz");
    bedFiles = IO.collapseFileArray(tot);
    if (bedFiles == null || bedFiles.length == 0)
      Misc.printErrAndExit("Problem finding your xxx.bed(.zip/.gz OK) files!\n");

    // genome version
    if (genomeVersion == null)
      Misc.printErrAndExit(
          "Please enter a genome version (e.g. H_sapiens_Mar_2006, see http://genome.ucsc.edu/FAQ/FAQreleases\n");
  }
 private void checkForGzippedVcf() {
   String vcfName = finalVcf.getName();
   if (vcfName.endsWith(".gz")) {
     File uncomp = new File(finalVcf.getParentFile(), vcfName.substring(0, vcfName.length() - 3));
     if (uncomp.exists() == false) {
       deleteTempVcf = true;
       if (IO.uncompress(finalVcf, uncomp) == null)
         Misc.printErrAndExit("ERROR: failed to uncompress -> " + finalVcf);
     }
     finalVcf = uncomp;
   }
 }
Esempio n. 6
0
  /** This method will process each argument and assign new varibles */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    File dir = null;
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'f':
              dir = new File(args[i + 1]);
              i++;
              break;
            case 'v':
              genomeVersion = args[i + 1];
              i++;
              break;
            case 's':
              strand = args[++i];
              break;
            case 't':
              stairStep = true;
              break;
            case 'h':
              printDocs();
              System.exit(0);
            default:
              Misc.printExit("\nError: unknown option! " + mat.group());
          }
        } catch (Exception e) {
          Misc.printExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }
    if (dir == null || dir.canRead() == false)
      Misc.printExit("\nError: cannot find or read your sgr file/ directory.\n");
    File[][] tot = new File[3][];
    tot[0] = IO.extractFiles(dir, ".sgr");
    tot[1] = IO.extractFiles(dir, ".sgr.zip");
    tot[2] = IO.extractFiles(dir, ".sgr.gz");
    sgrFiles = IO.collapseFileArray(tot);

    if (sgrFiles == null || sgrFiles.length == 0)
      Misc.printExit("\nError: cannot find your xxx.sgr.zip file(s)");
    if (genomeVersion == null)
      Misc.printExit(
          "\nError: you must supply a genome version. Goto http://genome.ucsc.edu/cgi-"
              + "bin/hgGateway load your organism to find the associated genome version.\n");
  }
  public String fetchVariantOutputXml() {

    String s =

        // Write variants to csv file no need for including bad.region, use the USeq VCFRegionMarker
        // instead
        // "<ViewerFile class='plugins.writers.varviewer.VarViewerWriter' anno.keys='bad.region'>
        // \n"+
        "<ViewerFile class='plugins.writers.varviewer.VarViewerWriter'> \n"
            + "	<VariantPool /> \n"
            + "	<Genes /> \n"
            + "	<ViewerCSV class='buffer.CSVFile' filename='"
            + outputDirectory.getName()
            + "_annotated.csv' /> \n"
            + "</ViewerFile> \n\n"
            +

            // Create Final JsonQC Section (needs to happen after variant calls)
            "<QCtoJSON class='operator.qc.QCtoJSON'> \n"
            + "	<rawBamMetrics /> \n"
            + "	<rawdocmetrics /> \n"
            + "	<finalBamMetrics /> \n"
            + "	<finaldocmetrics /> \n"
            + "	<bedForCoverageQC /> \n"
            + "	<filteredVars /> \n"
            + "	<NoCallCSV /> \n"
            + "	<QCOutputFile class='buffer.TextBuffer' filename='qc.json' /> \n"
            + "</QCtoJSON> \n"
            + "\n";
    return s;
  }
Esempio n. 8
0
  // constructor
  public Sgr2Bar(String[] args) {
    try {
      // check for args
      processArgs(args);

      System.out.println("Genome version -> " + genomeVersion);
      System.out.println("Strand -> " + strand);
      System.out.println("Stair Step? -> " + stairStep);
      System.out.println();

      // load tagValues
      if (stairStep) tagValues.put(BarParser.GRAPH_TYPE_TAG, BarParser.GRAPH_TYPE_STAIRSTEP);
      else tagValues.put(BarParser.GRAPH_TYPE_TAG, BarParser.GRAPH_TYPE_BAR);

      for (int x = 0; x < sgrFiles.length; x++) {
        System.out.println("\tLoading -> " + sgrFiles[x]);
        GrGraph[] grs = GrGraph.loadSgrFile(sgrFiles[x]);

        // make save directory
        String dirName;

        dirName = Misc.removeExtension(sgrFiles[x].getCanonicalPath());

        File dir = new File(dirName);
        dir.mkdir();
        // print bar files
        System.out.println("\tSaving...");
        for (int i = 0; i < grs.length; i++) {
          File barFile = new File(dir, grs[i].getChromosome() + ".bar");
          barParser.writeBarFile(
              barFile,
              grs[i].getChromosome(),
              genomeVersion,
              strand.charAt(0),
              grs[i].getBasePositions(),
              grs[i].getValues(),
              tagValues);
        }
      }
      System.out.println("\nDone!\n");
    } catch (IOException e) {
      e.printStackTrace();
      Misc.printErrAndExit("\nFailed to parse file!");
    }
  }
 public void splitIndex(String name, String seq) {
   int start = 0;
   boolean go = true;
   while (go) {
     int end = start + numberOfBases;
     if (end >= seq.length()) {
       end = seq.length();
       go = false;
     }
     String subSeq = seq.substring(start, end);
     File binarySeq = new File(indexDirectory, name + "_" + start + "-" + (end - 1));
     if (binarySeq.exists()) {
       System.out.println("WARNING, " + binarySeq + " already exists, skipping!");
       return;
     }
     Seq.writeBinarySequence(subSeq, binarySeq);
     start = end;
   }
 }
  private void buildXmlPropertiesFile() {
    System.out.println(
        "\nBuilding and checking your pipeline properties file -> " + truncPipePropFile);
    StringBuilder toPrint = new StringBuilder();

    // walk through the prop file
    String[] prop = IO.loadFileIntoStringArray(truncPipePropFile);
    Pattern val = Pattern.compile("(<entry key.+>)([D|A|B].*)</entry>");
    boolean missingFile = false;
    for (String s : prop) {
      // does it match a file needing prepending? Data/, Apps/, Bed/
      Matcher mat = val.matcher(s);
      if (mat.matches()) {
        File test = new File(referenceDir, mat.group(2));
        if (test.exists()) {
          System.out.println("Found\t" + test);
          toPrint.append(mat.group(1));
          toPrint.append(test.toString());
          toPrint.append("</entry>");
        } else {
          System.out.println("Misssing\t" + test);
          missingFile = true;
        }
      }
      // threads?
      else if (s.contains("threads"))
        toPrint.append("<entry key=\"threads\">" + threads + "</entry>");

      // nope just save it and add a line return
      else toPrint.append(s);
      toPrint.append("\n");
    }
    // anything missing? if so exit
    if (missingFile)
      Misc.printErrAndExit(
          "\nFailed to find all of the files in your properties file, see above.\n");

    // OK, write it out
    completePipelinePropFile = new File(outputDirectory, "pipelineProperties.xml");
    if (IO.writeString(toPrint.toString(), completePipelinePropFile) == false)
      Misc.printErrAndExit("Problem writing -> " + truncPipePropFile);
  }
 public String fetchWebRootLinksXml() {
   String s =
       "<CreateBAMLink class='operator.LinkCreator' sample='"
           + outputDirectory.getName()
           + "' web.root='"
           + webRootForLinks
           + "' result.dir='links/'> \n"
           + "        <finalBAM /> \n"
           + "        <bedForVars /> \n"
           + "</CreateBAMLink> \n\n";
   return s;
 }
  private void executePipelineJob() {
    String[] cmd = null;
    try {
      // write out tempTemplate
      File template = new File(outputDirectory, "pipelineTemplate.xml");
      if (IO.writeString(xmlTemplate, template) == false)
        Misc.printErrAndExit("Problem writing -> " + template);

      // build and execute cmd
      cmd =
          new String[] {
            "java",
            "-jar",
            "-Xmx2G",
            pJar.getCanonicalPath(),
            "-props",
            completePipelinePropFile.getCanonicalPath(),
            template.getCanonicalPath()
          };

      String stringCmd = Misc.stringArrayToString(cmd, " ");
      System.out.println("\nExecuting:\n" + stringCmd);
      System.out.println("\nPipelineOutput:");
      String[] out = IO.executeViaProcessBuilder(cmd, true);

      // check output for possible errors
      for (String line : out) {
        String lcLine = line.toLowerCase();
        // watch out for cases where error is mentioned in a warning output line.
        if (lcLine.contains("error") && lcLine.startsWith("warning") == false)
          Misc.printErrAndExit(
              "\n\nERROR found in Pipeline.jar output, see above. Aborting!\n" + line);
      }

    } catch (Exception e) {
      e.printStackTrace();
      Misc.printErrAndExit("ERROR: executing " + Misc.stringArrayToString(cmd, " "));
    }
  }
 /** This method will process each argument and assign new varibles */
 public void processArgs(String[] args) {
   Pattern pat = Pattern.compile("-[a-z]");
   for (int i = 0; i < args.length; i++) {
     String lcArg = args[i].toLowerCase();
     Matcher mat = pat.matcher(lcArg);
     if (mat.matches()) {
       char test = args[i].charAt(1);
       try {
         switch (test) {
           case 'f':
             fastas = IO.extractFiles(args[i + 1], "fasta");
             i++;
             break;
           case 'n':
             numberOfBases = Integer.parseInt(args[i + 1]);
             i++;
             break;
           case 'i':
             indexDirectory = new File(args[i + 1]);
             i++;
             break;
           case 'h':
             printDocs();
             System.exit(0);
           default:
             Misc.printExit("\nError: unknown option! " + mat.group());
         }
       } catch (Exception e) {
         Misc.printExit(
             "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
       }
     }
   }
   // make index directory?
   if (indexDirectory == null) {
     indexDirectory = new File(fastas[0].getParentFile(), "IndexedSequences");
     indexDirectory.mkdir();
   }
 }
Esempio n. 14
0
  /**
   * Makes a stair step heat map from an array of windows in bar format. One per chromosome. Don't
   * forget to set the barDirectory and score Index!!!!!!!
   */
  public void makeStairStepBarFiles() {
    // make bar parser
    BarParser bp = new BarParser();
    bp.setZipCompress(true);
    HashMap<String, String> tagVals = new HashMap<String, String>();
    tagVals.put(BarParser.GRAPH_TYPE_TAG, BarParser.GRAPH_TYPE_STAIRSTEP);
    tagVals.put(BarParser.GRAPH_TYPE_COLOR_TAG, "#FF00FF"); // fusha
    tagVals.put(BarParser.SOURCE_TAG, bedFile.toString());

    // for each chromosome
    System.out.print("Printing... ");
    Iterator<String> it = bedLinesHash.keySet().iterator();
    while (it.hasNext()) {
      chromosome = it.next();
      System.out.print(chromosome + " ");
      windows = bedLinesHash.get(chromosome);
      // add blocks
      assembleBlocks();
      // balance by adding max or min at zero base
      balanceValues();
      // write bar file
      File barFile = new File(barDirectory, chromosome + ".bar");
      bp.writeBarFile(
          barFile,
          chromosome,
          genomeVersion,
          '.',
          Num.arrayListOfIntegerToInts(bases),
          Num.arrayListOfFloatToArray(values),
          tagVals);
      // clear ArrayLists
      bases.clear();
      values.clear();
    }
    System.out.println();
  }
 // remove temp files, the pipeline.jar operator isn't working.
 public void deleteTempFiles() {
   System.out.println("\nRemoving these temp files:");
   File workingDir = new File(System.getProperty("user.dir"));
   File[] toExamine = workingDir.listFiles();
   for (File f : toExamine) {
     boolean d = false;
     String n = f.getName();
     if (n.startsWith("pipeinstancelog")) d = true;
     else if (n.contains(".DOC.sample")) d = true;
     else if (n.contains("allDepths.")) d = true;
     else if (n.startsWith("nocalls.")) d = true;
     else if (n.startsWith("snpeff.")) d = true;
     else if (n.contains("plice")) d = true;
     if (d) {
       System.out.println("\t" + n);
       f.deleteOnExit();
     }
   }
   // delete the temp uncompressed vcf (required by Pipeline.jar)
   if (deleteTempVcf) System.out.println("\t" + finalVcf.getName());
 }
  /** This method will process each argument and assign new variables */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    System.out.println(
        "\n" + IO.fetchUSeqVersion() + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n");
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'o':
              jobId = args[++i];
              break;
            case 's':
              sampleId = args[++i];
              break;
            case 'm':
              submitter = args[++i];
              break;
            case 'y':
              analysisType = args[++i];
              break;
            case 'w':
              webRootForLinks = new File(args[++i]);
              break;
            case 'e':
              snpEffGenome = args[++i];
              break;
            case 'i':
              minimumReadDepth = args[++i];
              break;
            case 't':
              threads = args[++i];
              break;
            case 'l':
              uploadVarsToNGSWeb = true;
              break;
            case 'j':
              pJar = new File(args[++i]);
              break;
            case 'p':
              truncPipePropFile = new File(args[++i]);
              break;
            case 'q':
              bedForCoverageQC = new File(args[++i]);
              break;
            case 'b':
              bedForVarCalling = new File(args[++i]);
              break;
            case 'r':
              fastaReference = new File(args[++i]);
              break;
            case 'u':
              unfilteredBam = new File(args[++i]);
              break;
            case 'f':
              finalBam = new File(args[++i]);
              break;
            case 'v':
              finalVcf = new File(args[++i]);
              break;
            case 'd':
              outputDirectory = new File(args[++i]);
              break;
            case 'c':
              referenceDir = new File(args[++i]);
              break;
            case 'h':
              printDocs();
              System.exit(0);
            default:
              Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          Misc.printErrAndExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }

    // check output dir and if needed set sampleId
    if (outputDirectory != null) {
      outputDirectory.mkdirs();
      if (sampleId.length() == 0) sampleId = outputDirectory.getName();
    }
    // check root directory if needed
    if (webRootForLinks != null) {
      if (webRootForLinks.exists() == false) webRootForLinks.mkdirs();
      // links dir?
      File links = new File(webRootForLinks, "links");
      if (links.exists() == false) links.mkdirs();
    }

    // look for required fields and files
    checkPrintFields();
    checkPrintFiles();
    checkForGzippedVcf();
  }