public Bed2Bar(String[] args) { try { processArgs(args); // load Window[] for (int i = 0; i < bedFiles.length; i++) { bedFile = bedFiles[i]; System.out.println("Parsing " + bedFile.getName()); bedLinesHash = Bed.parseBedFile(bedFile, true, false); if (bedLinesHash == null || bedLinesHash.size() == 0) { System.out.println("Problem parsing bed file, skipping!"); continue; } barDirectory = IO.makeDirectory(bedFile, ""); File bedOutFile = new File(Misc.removeExtension(bedFile.toString()) + "_" + threshold + "_Filt.bed"); bedOut = new PrintWriter(new FileWriter(bedOutFile)); makeStairStepBarFiles(); } bedOut.close(); System.out.println("\nDone!\n"); } catch (IOException e) { e.printStackTrace(); } }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { File file = null; Pattern pat = Pattern.compile("-[a-z]"); for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'f': file = new File(args[i + 1]); i++; break; case 'v': genomeVersion = args[i + 1]; i++; break; case 's': sumScores = true; break; case 't': threshold = Float.parseFloat(args[++i]); break; case 'h': printDocs(); System.exit(0); default: Misc.printExit("\nError, unknown option! " + mat.group()); } } catch (Exception e) { System.out.print("\nSorry, something doesn't look right with this parameter: -" + test); System.out.println(); System.exit(0); } } } if (file == null || file.exists() == false) Misc.printErrAndExit("Problem finding your bed files!\n"); // pull files File[][] tot = new File[3][]; tot[0] = IO.extractFiles(file, ".bed"); tot[1] = IO.extractFiles(file, ".bed.zip"); tot[2] = IO.extractFiles(file, ".bed.gz"); bedFiles = IO.collapseFileArray(tot); if (bedFiles == null || bedFiles.length == 0) Misc.printErrAndExit("Problem finding your xxx.bed(.zip/.gz OK) files!\n"); // genome version if (genomeVersion == null) Misc.printErrAndExit( "Please enter a genome version (e.g. H_sapiens_Mar_2006, see http://genome.ucsc.edu/FAQ/FAQreleases\n"); }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); File dir = null; for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'f': dir = new File(args[i + 1]); i++; break; case 'v': genomeVersion = args[i + 1]; i++; break; case 's': strand = args[++i]; break; case 't': stairStep = true; break; case 'h': printDocs(); System.exit(0); default: Misc.printExit("\nError: unknown option! " + mat.group()); } } catch (Exception e) { Misc.printExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } if (dir == null || dir.canRead() == false) Misc.printExit("\nError: cannot find or read your sgr file/ directory.\n"); File[][] tot = new File[3][]; tot[0] = IO.extractFiles(dir, ".sgr"); tot[1] = IO.extractFiles(dir, ".sgr.zip"); tot[2] = IO.extractFiles(dir, ".sgr.gz"); sgrFiles = IO.collapseFileArray(tot); if (sgrFiles == null || sgrFiles.length == 0) Misc.printExit("\nError: cannot find your xxx.sgr.zip file(s)"); if (genomeVersion == null) Misc.printExit( "\nError: you must supply a genome version. Goto http://genome.ucsc.edu/cgi-" + "bin/hgGateway load your organism to find the associated genome version.\n"); }
/** This method will process each argument and assign new variables */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); File forExtraction = null; for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'v': forExtraction = new File(args[++i]); break; case 'm': minimumSSC = Float.parseFloat(args[++i]); break; case 'a': minimumCount = Integer.parseInt(args[++i]); break; case 'r': minimumAbsFractionChange = Double.parseDouble(args[++i]); break; case 'n': maximumNormalAltFraction = Double.parseDouble(args[++i]); break; default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group()); } } catch (Exception e) { Misc.printErrAndExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } System.out.println( "\n" + IO.fetchUSeqVersion() + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n"); // pull vcf files if (forExtraction == null || forExtraction.exists() == false) Misc.printErrAndExit( "\nError: please enter a path to a vcf file or directory containing such.\n"); File[][] tot = new File[3][]; tot[0] = IO.extractFiles(forExtraction, ".vcf"); tot[1] = IO.extractFiles(forExtraction, ".vcf.gz"); tot[2] = IO.extractFiles(forExtraction, ".vcf.zip"); vcfFiles = IO.collapseFileArray(tot); if (vcfFiles == null || vcfFiles.length == 0 || vcfFiles[0].canRead() == false) Misc.printExit("\nError: cannot find your xxx.vcf(.zip/.gz OK) file(s)!\n"); }
// constructor public TQuery(String[] args) { try { long startTime = System.currentTimeMillis(); processArgs(args); queryIndex = new QueryIndex(this); queryLoader = new QueryLoader(this); // print some stats on building the engine String diffTime = Num.formatNumberOneFraction(((double) (System.currentTimeMillis() - startTime)) / 1000); int numFiles = vcfDataFiles.length + bedDataFiles.length + mafDataFiles.length; System.err.println("\n" + diffTime + " Sec to build using " + IO.memory() + " of RAM"); System.err.println("\t" + numFiles + "\tData sources loaded"); System.err.println("\t" + dataSources.getRecordsLoaded() + "\tRecords indexed"); System.err.println("\t" + dataSources.getRecordsSkipped() + "\tRecords skipped\n"); // print summary of available filters System.err.println(dataSources.fetchSummary()); queryFilesFromCmdLine(); // release file handles queryLoader.closeTabixReaders(); } catch (Exception e) { e.printStackTrace(); System.err.println("\nProblem with executing the TQuery!"); } }
private void buildXmlPropertiesFile() { System.out.println( "\nBuilding and checking your pipeline properties file -> " + truncPipePropFile); StringBuilder toPrint = new StringBuilder(); // walk through the prop file String[] prop = IO.loadFileIntoStringArray(truncPipePropFile); Pattern val = Pattern.compile("(<entry key.+>)([D|A|B].*)</entry>"); boolean missingFile = false; for (String s : prop) { // does it match a file needing prepending? Data/, Apps/, Bed/ Matcher mat = val.matcher(s); if (mat.matches()) { File test = new File(referenceDir, mat.group(2)); if (test.exists()) { System.out.println("Found\t" + test); toPrint.append(mat.group(1)); toPrint.append(test.toString()); toPrint.append("</entry>"); } else { System.out.println("Misssing\t" + test); missingFile = true; } } // threads? else if (s.contains("threads")) toPrint.append("<entry key=\"threads\">" + threads + "</entry>"); // nope just save it and add a line return else toPrint.append(s); toPrint.append("\n"); } // anything missing? if so exit if (missingFile) Misc.printErrAndExit( "\nFailed to find all of the files in your properties file, see above.\n"); // OK, write it out completePipelinePropFile = new File(outputDirectory, "pipelineProperties.xml"); if (IO.writeString(toPrint.toString(), completePipelinePropFile) == false) Misc.printErrAndExit("Problem writing -> " + truncPipePropFile); }
private void checkForGzippedVcf() { String vcfName = finalVcf.getName(); if (vcfName.endsWith(".gz")) { File uncomp = new File(finalVcf.getParentFile(), vcfName.substring(0, vcfName.length() - 3)); if (uncomp.exists() == false) { deleteTempVcf = true; if (IO.uncompress(finalVcf, uncomp) == null) Misc.printErrAndExit("ERROR: failed to uncompress -> " + finalVcf); } finalVcf = uncomp; } }
public boolean bedType(File bedFile) { stranded = false; scored = false; named = false; bed12 = false; boolean bedLineFound = false; try { // read through first 10000 lines // chrom start stop name score strand .... // 0 1 2 3 4 5 BufferedReader in = IO.fetchBufferedReader(bedFile); String[] tokens; String line; int counter = 0; while ((line = in.readLine()) != null) { if (counter++ > 10000) break; // bed line? if (bedLine.matcher(line).matches() == false) continue; bedLineFound = true; // split on tab tokens = tab.split(line); // bed12? if (tokens.length == 12) { bed12 = true; break; } if (tokens.length > 3) { // named? if (badName.matcher(tokens[3]).matches() == false) named = true; if (tokens.length > 4) { // scored? if (tokens[4].equals("0") == false) scored = true; if (tokens.length > 5) { // stranded? if (tokens[5].equals(".") == false) stranded = true; } } } } in.close(); } catch (Exception e) { System.err.println("\nProblem parsing bed type.\n"); e.printStackTrace(); bedLineFound = false; } return bedLineFound; }
private void executePipelineJob() { String[] cmd = null; try { // write out tempTemplate File template = new File(outputDirectory, "pipelineTemplate.xml"); if (IO.writeString(xmlTemplate, template) == false) Misc.printErrAndExit("Problem writing -> " + template); // build and execute cmd cmd = new String[] { "java", "-jar", "-Xmx2G", pJar.getCanonicalPath(), "-props", completePipelinePropFile.getCanonicalPath(), template.getCanonicalPath() }; String stringCmd = Misc.stringArrayToString(cmd, " "); System.out.println("\nExecuting:\n" + stringCmd); System.out.println("\nPipelineOutput:"); String[] out = IO.executeViaProcessBuilder(cmd, true); // check output for possible errors for (String line : out) { String lcLine = line.toLowerCase(); // watch out for cases where error is mentioned in a warning output line. if (lcLine.contains("error") && lcLine.startsWith("warning") == false) Misc.printErrAndExit( "\n\nERROR found in Pipeline.jar output, see above. Aborting!\n" + line); } } catch (Exception e) { e.printStackTrace(); Misc.printErrAndExit("ERROR: executing " + Misc.stringArrayToString(cmd, " ")); } }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'f': fastas = IO.extractFiles(args[i + 1], "fasta"); i++; break; case 'n': numberOfBases = Integer.parseInt(args[i + 1]); i++; break; case 'i': indexDirectory = new File(args[i + 1]); i++; break; case 'h': printDocs(); System.exit(0); default: Misc.printExit("\nError: unknown option! " + mat.group()); } } catch (Exception e) { Misc.printExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } // make index directory? if (indexDirectory == null) { indexDirectory = new File(fastas[0].getParentFile(), "IndexedSequences"); indexDirectory.mkdir(); } }
/** This method will process each argument and assign new variables */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); System.out.println( "\n" + IO.fetchUSeqVersion() + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n"); for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'o': jobId = args[++i]; break; case 's': sampleId = args[++i]; break; case 'm': submitter = args[++i]; break; case 'y': analysisType = args[++i]; break; case 'w': webRootForLinks = new File(args[++i]); break; case 'e': snpEffGenome = args[++i]; break; case 'i': minimumReadDepth = args[++i]; break; case 't': threads = args[++i]; break; case 'l': uploadVarsToNGSWeb = true; break; case 'j': pJar = new File(args[++i]); break; case 'p': truncPipePropFile = new File(args[++i]); break; case 'q': bedForCoverageQC = new File(args[++i]); break; case 'b': bedForVarCalling = new File(args[++i]); break; case 'r': fastaReference = new File(args[++i]); break; case 'u': unfilteredBam = new File(args[++i]); break; case 'f': finalBam = new File(args[++i]); break; case 'v': finalVcf = new File(args[++i]); break; case 'd': outputDirectory = new File(args[++i]); break; case 'c': referenceDir = new File(args[++i]); break; case 'h': printDocs(); System.exit(0); default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group()); } } catch (Exception e) { Misc.printErrAndExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } // check output dir and if needed set sampleId if (outputDirectory != null) { outputDirectory.mkdirs(); if (sampleId.length() == 0) sampleId = outputDirectory.getName(); } // check root directory if needed if (webRootForLinks != null) { if (webRootForLinks.exists() == false) webRootForLinks.mkdirs(); // links dir? File links = new File(webRootForLinks, "links"); if (links.exists() == false) links.mkdirs(); } // look for required fields and files checkPrintFields(); checkPrintFiles(); checkForGzippedVcf(); }
/** This method will process each argument and assign new variables */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); File forExtraction = null; File ucscDir = null; for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'b': forExtraction = new File(args[++i]); break; case 'd': ucscDir = new File(args[++i]); break; case 'v': versionedGenome = args[++i]; break; case 'e': verbose = false; break; case 'f': forceConversion = true; break; case 'h': printDocs(); System.exit(0); break; default: USeqUtilities.printExit("\nProblem, unknown option! " + mat.group()); } } catch (Exception e) { USeqUtilities.printExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } if (verbose) System.out.println( "\n" + IO.fetchUSeqVersion() + " Arguments: " + USeqUtilities.stringArrayToString(args, " ") + "\n"); // versioned genome? if (versionedGenome == null) USeqUtilities.printExit( "\nError: you must supply a genome version. Goto http://genome.ucsc.edu/cgi-" + "bin/hgGateway load your organism to find the associated genome version (e.g. H_sapiens_Mar_2006, H_sapiens_Feb_2009).\n"); // make files if (ucscDir == null || ucscDir.isDirectory() == false) USeqUtilities.printExit( "\nCannot find your directory containing the UCSC wig2BigWig and bed2BigBed apps -> " + ucscDir); bigWigToWig = new File(ucscDir, "bigWigToWig"); bigBedToBed = new File(ucscDir, "bigBedToBed"); // check files // if (bigWigToBedGraph.canExecute() == false) USeqUtilities.printExit("\nCannot find or execute // -> "+bigWigToBedGraph+"\n"); if (bigWigToWig.canExecute() == false) USeqUtilities.printExit("\nCannot find or execute -> " + bigWigToWig + "\n"); if (bigBedToBed.canExecute() == false) USeqUtilities.printExit("\nCannot find or execute -> " + bigBedToBed + "\n"); // pull ucsc files if (forExtraction == null || forExtraction.exists() == false) Misc.printExit( "\nError: please enter a bw or bb file or directory containing such to convert!\n"); File[][] tot = new File[2][]; tot[0] = USeqUtilities.fetchFilesRecursively(forExtraction, ".bw"); tot[1] = USeqUtilities.fetchFilesRecursively(forExtraction, ".bb"); ucscArchives = IO.collapseFileArray(tot); if (ucscArchives == null || ucscArchives.length == 0 || ucscArchives[0].canRead() == false) Misc.printExit("\nError: cannot find or read any xxx.bb or xxx.bw file(s)!\n"); }
/** This method will process each argument and assign new varibles */ public void processArgs(String[] args) { Pattern pat = Pattern.compile("-[a-z]"); String useqVersion = IO.fetchUSeqVersion(); System.err.println( "\n" + useqVersion + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n"); File tabixDataDir = null; for (int i = 0; i < args.length; i++) { String lcArg = args[i].toLowerCase(); Matcher mat = pat.matcher(lcArg); if (mat.matches()) { char test = args[i].charAt(1); try { switch (test) { case 'c': chrLengthFile = new File(args[++i]); break; case 'd': tabixDataDir = new File(args[++i]); break; case 'n': numberThreads = Integer.parseInt(args[++i]); break; case 'q': numberQueriesInChunk = Integer.parseInt(args[++i]); break; default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group()); } } catch (Exception e) { Misc.printErrAndExit( "\nSorry, something doesn't look right with this parameter: -" + test + "\n"); } } } if (chrLengthFile == null) Misc.printErrAndExit( "\nError: please provide a bed file of chromosome and their lengths, e.g. X 0 155270560\n"); if (tabixDataDir == null || tabixDataDir.isDirectory() == false) Misc.printErrAndExit( "\nError: please provide a directory containing tabix indexed xxx.vcf.gz and xxx.bed.gz files with their associated xxx.gz.tbi indexes"); // pull data sources vcfDataFiles = IO.fetchFilesRecursively(tabixDataDir, "vcf.gz"); bedDataFiles = IO.fetchFilesRecursively(tabixDataDir, "bed.gz"); mafDataFiles = IO.fetchFilesRecursively(tabixDataDir, "maf.txt.gz"); if (vcfDataFiles.length == 0 && bedDataFiles.length == 0 && mafDataFiles.length == 0) Misc.printErrAndExit( "\nError: failed to find any xxx.bed.gz, xxx.vcf.gz, or xxx.maf.txt.gz tabix files in your tabixDataDir -> " + tabixDataDir); // check for index lookForTabixIndex(vcfDataFiles); lookForTabixIndex(bedDataFiles); lookForTabixIndex(mafDataFiles); // threads to use int numAvail = Runtime.getRuntime().availableProcessors(); if (numberThreads < 1) numberThreads = numAvail - 1; System.err.println( numAvail + " Available processors, using " + numberThreads + " threaded loaders\n"); }