public Bed2Bar(String[] args) { try { processArgs(args); // load Window[] for (int i = 0; i < bedFiles.length; i++) { bedFile = bedFiles[i]; System.out.println("Parsing " + bedFile.getName()); bedLinesHash = Bed.parseBedFile(bedFile, true, false); if (bedLinesHash == null || bedLinesHash.size() == 0) { System.out.println("Problem parsing bed file, skipping!"); continue; } barDirectory = IO.makeDirectory(bedFile, ""); File bedOutFile = new File(Misc.removeExtension(bedFile.toString()) + "_" + threshold + "_Filt.bed"); bedOut = new PrintWriter(new FileWriter(bedOutFile)); makeStairStepBarFiles(); } bedOut.close(); System.out.println("\nDone!\n"); } catch (IOException e) { e.printStackTrace(); } }
public String fetchCleanupReviewDirLinkGenXml() { String s = null; try { s = // Organize and Clean Up Section "<ReviewDirGenerator class='operator.ReviewDirGenerator' destination.dir='" + outputDirectory.getCanonicalPath() + "' sample='" + outputDirectory.getName() + "' submitter='" + submitter + "' analysis.type='" + analysisType + "'> \n" + " <finalVariants /> \n" + " <ViewerCSV /> \n" + " <InstanceLog class='buffer.InstanceLogFile' /> \n" + " <finalBAM /> \n" + " <QCOutputFile /> \n" + " <bedForVars /> \n" + "</ReviewDirGenerator> \n\n"; } catch (IOException e) { e.printStackTrace(); Misc.printErrAndExit("\nError fetching clean up and review dir links.\n"); } return s; }
public void checkPrintFiles() { LinkedHashMap<String, File> nameFile = new LinkedHashMap<String, File>(); nameFile.put("Reference directory", referenceDir); nameFile.put("Pipeline.jar", pJar); nameFile.put("Pipeline properties", truncPipePropFile); nameFile.put("Coverage QC bed", bedForCoverageQC); nameFile.put("Variant calling bed", bedForVarCalling); nameFile.put("Fasta genome reference", fastaReference); nameFile.put("Unfiltered bam", unfilteredBam); nameFile.put("Final bam", finalBam); nameFile.put("Final vcf", finalVcf); if (webRootForLinks != null) nameFile.put("Web links dir", webRootForLinks); nameFile.put("Final output dir", outputDirectory); boolean missingFile = false; System.out.println("\nResources (name exists path):"); for (String name : nameFile.keySet()) { File f = nameFile.get(name); boolean fExists = true; if (f == null) { fExists = false; missingFile = true; } else { fExists = f.exists(); if (fExists == false) missingFile = true; } System.out.println(name + "\t" + fExists + "\t" + f); } if (missingFile) Misc.printErrAndExit("\nMissing resources! See above."); }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { File file = null; Pattern pat = Pattern.compile("-[a-z]"); for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'f': file = new File(args[i + 1]); i++; break; case 'v': genomeVersion = args[i + 1]; i++; break; case 's': sumScores = true; break; case 't': threshold = Float.parseFloat(args[++i]); break; case 'h': printDocs(); System.exit(0); default: Misc.printExit("\nError, unknown option! " + mat.group()); } } catch (Exception e) { System.out.print("\nSorry, something doesn't look right with this parameter: -" + test); System.out.println(); System.exit(0); } } } if (file == null || file.exists() == false) Misc.printErrAndExit("Problem finding your bed files!\n"); // pull files File[][] tot = new File[3][]; tot[0] = IO.extractFiles(file, ".bed"); tot[1] = IO.extractFiles(file, ".bed.zip"); tot[2] = IO.extractFiles(file, ".bed.gz"); bedFiles = IO.collapseFileArray(tot); if (bedFiles == null || bedFiles.length == 0) Misc.printErrAndExit("Problem finding your xxx.bed(.zip/.gz OK) files!\n"); // genome version if (genomeVersion == null) Misc.printErrAndExit( "Please enter a genome version (e.g. H_sapiens_Mar_2006, see http://genome.ucsc.edu/FAQ/FAQreleases\n"); }
private void checkForGzippedVcf() { String vcfName = finalVcf.getName(); if (vcfName.endsWith(".gz")) { File uncomp = new File(finalVcf.getParentFile(), vcfName.substring(0, vcfName.length() - 3)); if (uncomp.exists() == false) { deleteTempVcf = true; if (IO.uncompress(finalVcf, uncomp) == null) Misc.printErrAndExit("ERROR: failed to uncompress -> " + finalVcf); } finalVcf = uncomp; } }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); File dir = null; for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'f': dir = new File(args[i + 1]); i++; break; case 'v': genomeVersion = args[i + 1]; i++; break; case 's': strand = args[++i]; break; case 't': stairStep = true; break; case 'h': printDocs(); System.exit(0); default: Misc.printExit("\nError: unknown option! " + mat.group()); } } catch (Exception e) { Misc.printExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } if (dir == null || dir.canRead() == false) Misc.printExit("\nError: cannot find or read your sgr file/ directory.\n"); File[][] tot = new File[3][]; tot[0] = IO.extractFiles(dir, ".sgr"); tot[1] = IO.extractFiles(dir, ".sgr.zip"); tot[2] = IO.extractFiles(dir, ".sgr.gz"); sgrFiles = IO.collapseFileArray(tot); if (sgrFiles == null || sgrFiles.length == 0) Misc.printExit("\nError: cannot find your xxx.sgr.zip file(s)"); if (genomeVersion == null) Misc.printExit( "\nError: you must supply a genome version. Goto http://genome.ucsc.edu/cgi-" + "bin/hgGateway load your organism to find the associated genome version.\n"); }
public String fetchVariantOutputXml() { String s = // Write variants to csv file no need for including bad.region, use the USeq VCFRegionMarker // instead // "<ViewerFile class='plugins.writers.varviewer.VarViewerWriter' anno.keys='bad.region'> // \n"+ "<ViewerFile class='plugins.writers.varviewer.VarViewerWriter'> \n" + " <VariantPool /> \n" + " <Genes /> \n" + " <ViewerCSV class='buffer.CSVFile' filename='" + outputDirectory.getName() + "_annotated.csv' /> \n" + "</ViewerFile> \n\n" + // Create Final JsonQC Section (needs to happen after variant calls) "<QCtoJSON class='operator.qc.QCtoJSON'> \n" + " <rawBamMetrics /> \n" + " <rawdocmetrics /> \n" + " <finalBamMetrics /> \n" + " <finaldocmetrics /> \n" + " <bedForCoverageQC /> \n" + " <filteredVars /> \n" + " <NoCallCSV /> \n" + " <QCOutputFile class='buffer.TextBuffer' filename='qc.json' /> \n" + "</QCtoJSON> \n" + "\n"; return s; }
// constructor public Sgr2Bar(String[] args) { try { // check for args processArgs(args); System.out.println("Genome version -> " + genomeVersion); System.out.println("Strand -> " + strand); System.out.println("Stair Step? -> " + stairStep); System.out.println(); // load tagValues if (stairStep) tagValues.put(BarParser.GRAPH_TYPE_TAG, BarParser.GRAPH_TYPE_STAIRSTEP); else tagValues.put(BarParser.GRAPH_TYPE_TAG, BarParser.GRAPH_TYPE_BAR); for (int x = 0; x < sgrFiles.length; x++) { System.out.println("\tLoading -> " + sgrFiles[x]); GrGraph[] grs = GrGraph.loadSgrFile(sgrFiles[x]); // make save directory String dirName; dirName = Misc.removeExtension(sgrFiles[x].getCanonicalPath()); File dir = new File(dirName); dir.mkdir(); // print bar files System.out.println("\tSaving..."); for (int i = 0; i < grs.length; i++) { File barFile = new File(dir, grs[i].getChromosome() + ".bar"); barParser.writeBarFile( barFile, grs[i].getChromosome(), genomeVersion, strand.charAt(0), grs[i].getBasePositions(), grs[i].getValues(), tagValues); } } System.out.println("\nDone!\n"); } catch (IOException e) { e.printStackTrace(); Misc.printErrAndExit("\nFailed to parse file!"); } }
public void splitIndex(String name, String seq) { int start = 0; boolean go = true; while (go) { int end = start + numberOfBases; if (end >= seq.length()) { end = seq.length(); go = false; } String subSeq = seq.substring(start, end); File binarySeq = new File(indexDirectory, name + "_" + start + "-" + (end - 1)); if (binarySeq.exists()) { System.out.println("WARNING, " + binarySeq + " already exists, skipping!"); return; } Seq.writeBinarySequence(subSeq, binarySeq); start = end; } }
private void buildXmlPropertiesFile() { System.out.println( "\nBuilding and checking your pipeline properties file -> " + truncPipePropFile); StringBuilder toPrint = new StringBuilder(); // walk through the prop file String[] prop = IO.loadFileIntoStringArray(truncPipePropFile); Pattern val = Pattern.compile("(<entry key.+>)([D|A|B].*)</entry>"); boolean missingFile = false; for (String s : prop) { // does it match a file needing prepending? Data/, Apps/, Bed/ Matcher mat = val.matcher(s); if (mat.matches()) { File test = new File(referenceDir, mat.group(2)); if (test.exists()) { System.out.println("Found\t" + test); toPrint.append(mat.group(1)); toPrint.append(test.toString()); toPrint.append("</entry>"); } else { System.out.println("Misssing\t" + test); missingFile = true; } } // threads? else if (s.contains("threads")) toPrint.append("<entry key=\"threads\">" + threads + "</entry>"); // nope just save it and add a line return else toPrint.append(s); toPrint.append("\n"); } // anything missing? if so exit if (missingFile) Misc.printErrAndExit( "\nFailed to find all of the files in your properties file, see above.\n"); // OK, write it out completePipelinePropFile = new File(outputDirectory, "pipelineProperties.xml"); if (IO.writeString(toPrint.toString(), completePipelinePropFile) == false) Misc.printErrAndExit("Problem writing -> " + truncPipePropFile); }
public String fetchWebRootLinksXml() { String s = "<CreateBAMLink class='operator.LinkCreator' sample='" + outputDirectory.getName() + "' web.root='" + webRootForLinks + "' result.dir='links/'> \n" + " <finalBAM /> \n" + " <bedForVars /> \n" + "</CreateBAMLink> \n\n"; return s; }
private void executePipelineJob() { String[] cmd = null; try { // write out tempTemplate File template = new File(outputDirectory, "pipelineTemplate.xml"); if (IO.writeString(xmlTemplate, template) == false) Misc.printErrAndExit("Problem writing -> " + template); // build and execute cmd cmd = new String[] { "java", "-jar", "-Xmx2G", pJar.getCanonicalPath(), "-props", completePipelinePropFile.getCanonicalPath(), template.getCanonicalPath() }; String stringCmd = Misc.stringArrayToString(cmd, " "); System.out.println("\nExecuting:\n" + stringCmd); System.out.println("\nPipelineOutput:"); String[] out = IO.executeViaProcessBuilder(cmd, true); // check output for possible errors for (String line : out) { String lcLine = line.toLowerCase(); // watch out for cases where error is mentioned in a warning output line. if (lcLine.contains("error") && lcLine.startsWith("warning") == false) Misc.printErrAndExit( "\n\nERROR found in Pipeline.jar output, see above. Aborting!\n" + line); } } catch (Exception e) { e.printStackTrace(); Misc.printErrAndExit("ERROR: executing " + Misc.stringArrayToString(cmd, " ")); } }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'f': fastas = IO.extractFiles(args[i + 1], "fasta"); i++; break; case 'n': numberOfBases = Integer.parseInt(args[i + 1]); i++; break; case 'i': indexDirectory = new File(args[i + 1]); i++; break; case 'h': printDocs(); System.exit(0); default: Misc.printExit("\nError: unknown option! " + mat.group()); } } catch (Exception e) { Misc.printExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } // make index directory? if (indexDirectory == null) { indexDirectory = new File(fastas[0].getParentFile(), "IndexedSequences"); indexDirectory.mkdir(); } }
/** * Makes a stair step heat map from an array of windows in bar format. One per chromosome. Don't * forget to set the barDirectory and score Index!!!!!!! */ public void makeStairStepBarFiles() { // make bar parser BarParser bp = new BarParser(); bp.setZipCompress(true); HashMap<String, String> tagVals = new HashMap<String, String>(); tagVals.put(BarParser.GRAPH_TYPE_TAG, BarParser.GRAPH_TYPE_STAIRSTEP); tagVals.put(BarParser.GRAPH_TYPE_COLOR_TAG, "#FF00FF"); // fusha tagVals.put(BarParser.SOURCE_TAG, bedFile.toString()); // for each chromosome System.out.print("Printing... "); Iterator<String> it = bedLinesHash.keySet().iterator(); while (it.hasNext()) { chromosome = it.next(); System.out.print(chromosome + " "); windows = bedLinesHash.get(chromosome); // add blocks assembleBlocks(); // balance by adding max or min at zero base balanceValues(); // write bar file File barFile = new File(barDirectory, chromosome + ".bar"); bp.writeBarFile( barFile, chromosome, genomeVersion, '.', Num.arrayListOfIntegerToInts(bases), Num.arrayListOfFloatToArray(values), tagVals); // clear ArrayLists bases.clear(); values.clear(); } System.out.println(); }
// remove temp files, the pipeline.jar operator isn't working. public void deleteTempFiles() { System.out.println("\nRemoving these temp files:"); File workingDir = new File(System.getProperty("user.dir")); File[] toExamine = workingDir.listFiles(); for (File f : toExamine) { boolean d = false; String n = f.getName(); if (n.startsWith("pipeinstancelog")) d = true; else if (n.contains(".DOC.sample")) d = true; else if (n.contains("allDepths.")) d = true; else if (n.startsWith("nocalls.")) d = true; else if (n.startsWith("snpeff.")) d = true; else if (n.contains("plice")) d = true; if (d) { System.out.println("\t" + n); f.deleteOnExit(); } } // delete the temp uncompressed vcf (required by Pipeline.jar) if (deleteTempVcf) System.out.println("\t" + finalVcf.getName()); }
/** This method will process each argument and assign new variables */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); System.out.println( "\n" + IO.fetchUSeqVersion() + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n"); for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'o': jobId = args[++i]; break; case 's': sampleId = args[++i]; break; case 'm': submitter = args[++i]; break; case 'y': analysisType = args[++i]; break; case 'w': webRootForLinks = new File(args[++i]); break; case 'e': snpEffGenome = args[++i]; break; case 'i': minimumReadDepth = args[++i]; break; case 't': threads = args[++i]; break; case 'l': uploadVarsToNGSWeb = true; break; case 'j': pJar = new File(args[++i]); break; case 'p': truncPipePropFile = new File(args[++i]); break; case 'q': bedForCoverageQC = new File(args[++i]); break; case 'b': bedForVarCalling = new File(args[++i]); break; case 'r': fastaReference = new File(args[++i]); break; case 'u': unfilteredBam = new File(args[++i]); break; case 'f': finalBam = new File(args[++i]); break; case 'v': finalVcf = new File(args[++i]); break; case 'd': outputDirectory = new File(args[++i]); break; case 'c': referenceDir = new File(args[++i]); break; case 'h': printDocs(); System.exit(0); default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group()); } } catch (Exception e) { Misc.printErrAndExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } // check output dir and if needed set sampleId if (outputDirectory != null) { outputDirectory.mkdirs(); if (sampleId.length() == 0) sampleId = outputDirectory.getName(); } // check root directory if needed if (webRootForLinks != null) { if (webRootForLinks.exists() == false) webRootForLinks.mkdirs(); // links dir? File links = new File(webRootForLinks, "links"); if (links.exists() == false) links.mkdirs(); } // look for required fields and files checkPrintFields(); checkPrintFiles(); checkForGzippedVcf(); }