Exemplo n.º 1
1
 public Bed2Bar(String[] args) {
   try {
     processArgs(args);
     // load Window[]
     for (int i = 0; i < bedFiles.length; i++) {
       bedFile = bedFiles[i];
       System.out.println("Parsing " + bedFile.getName());
       bedLinesHash = Bed.parseBedFile(bedFile, true, false);
       if (bedLinesHash == null || bedLinesHash.size() == 0) {
         System.out.println("Problem parsing bed file, skipping!");
         continue;
       }
       barDirectory = IO.makeDirectory(bedFile, "");
       File bedOutFile =
           new File(Misc.removeExtension(bedFile.toString()) + "_" + threshold + "_Filt.bed");
       bedOut = new PrintWriter(new FileWriter(bedOutFile));
       makeStairStepBarFiles();
     }
     bedOut.close();
     System.out.println("\nDone!\n");
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Exemplo n.º 2
0
  /** This method will process each argument and assign new varibles */
  public void processArgs(String[] args) {
    File file = null;
    Pattern pat = Pattern.compile("-[a-z]");
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'f':
              file = new File(args[i + 1]);
              i++;
              break;
            case 'v':
              genomeVersion = args[i + 1];
              i++;
              break;
            case 's':
              sumScores = true;
              break;
            case 't':
              threshold = Float.parseFloat(args[++i]);
              break;
            case 'h':
              printDocs();
              System.exit(0);
            default:
              Misc.printExit("\nError, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          System.out.print("\nSorry, something doesn't look right with this parameter: -" + test);
          System.out.println();
          System.exit(0);
        }
      }
    }
    if (file == null || file.exists() == false)
      Misc.printErrAndExit("Problem finding your bed files!\n");
    // pull files
    File[][] tot = new File[3][];
    tot[0] = IO.extractFiles(file, ".bed");
    tot[1] = IO.extractFiles(file, ".bed.zip");
    tot[2] = IO.extractFiles(file, ".bed.gz");
    bedFiles = IO.collapseFileArray(tot);
    if (bedFiles == null || bedFiles.length == 0)
      Misc.printErrAndExit("Problem finding your xxx.bed(.zip/.gz OK) files!\n");

    // genome version
    if (genomeVersion == null)
      Misc.printErrAndExit(
          "Please enter a genome version (e.g. H_sapiens_Mar_2006, see http://genome.ucsc.edu/FAQ/FAQreleases\n");
  }
Exemplo n.º 3
0
  /** This method will process each argument and assign new varibles */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    File dir = null;
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'f':
              dir = new File(args[i + 1]);
              i++;
              break;
            case 'v':
              genomeVersion = args[i + 1];
              i++;
              break;
            case 's':
              strand = args[++i];
              break;
            case 't':
              stairStep = true;
              break;
            case 'h':
              printDocs();
              System.exit(0);
            default:
              Misc.printExit("\nError: unknown option! " + mat.group());
          }
        } catch (Exception e) {
          Misc.printExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }
    if (dir == null || dir.canRead() == false)
      Misc.printExit("\nError: cannot find or read your sgr file/ directory.\n");
    File[][] tot = new File[3][];
    tot[0] = IO.extractFiles(dir, ".sgr");
    tot[1] = IO.extractFiles(dir, ".sgr.zip");
    tot[2] = IO.extractFiles(dir, ".sgr.gz");
    sgrFiles = IO.collapseFileArray(tot);

    if (sgrFiles == null || sgrFiles.length == 0)
      Misc.printExit("\nError: cannot find your xxx.sgr.zip file(s)");
    if (genomeVersion == null)
      Misc.printExit(
          "\nError: you must supply a genome version. Goto http://genome.ucsc.edu/cgi-"
              + "bin/hgGateway load your organism to find the associated genome version.\n");
  }
  /** This method will process each argument and assign new variables */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    File forExtraction = null;
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'v':
              forExtraction = new File(args[++i]);
              break;
            case 'm':
              minimumSSC = Float.parseFloat(args[++i]);
              break;
            case 'a':
              minimumCount = Integer.parseInt(args[++i]);
              break;
            case 'r':
              minimumAbsFractionChange = Double.parseDouble(args[++i]);
              break;
            case 'n':
              maximumNormalAltFraction = Double.parseDouble(args[++i]);
              break;
            default:
              Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          Misc.printErrAndExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }
    System.out.println(
        "\n" + IO.fetchUSeqVersion() + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n");

    // pull vcf files
    if (forExtraction == null || forExtraction.exists() == false)
      Misc.printErrAndExit(
          "\nError: please enter a path to a vcf file or directory containing such.\n");
    File[][] tot = new File[3][];
    tot[0] = IO.extractFiles(forExtraction, ".vcf");
    tot[1] = IO.extractFiles(forExtraction, ".vcf.gz");
    tot[2] = IO.extractFiles(forExtraction, ".vcf.zip");
    vcfFiles = IO.collapseFileArray(tot);
    if (vcfFiles == null || vcfFiles.length == 0 || vcfFiles[0].canRead() == false)
      Misc.printExit("\nError: cannot find your xxx.vcf(.zip/.gz OK) file(s)!\n");
  }
Exemplo n.º 5
0
  // constructor
  public TQuery(String[] args) {
    try {
      long startTime = System.currentTimeMillis();
      processArgs(args);

      queryIndex = new QueryIndex(this);
      queryLoader = new QueryLoader(this);

      // print some stats on building the engine
      String diffTime =
          Num.formatNumberOneFraction(((double) (System.currentTimeMillis() - startTime)) / 1000);
      int numFiles = vcfDataFiles.length + bedDataFiles.length + mafDataFiles.length;
      System.err.println("\n" + diffTime + " Sec to build using " + IO.memory() + " of RAM");
      System.err.println("\t" + numFiles + "\tData sources loaded");
      System.err.println("\t" + dataSources.getRecordsLoaded() + "\tRecords indexed");
      System.err.println("\t" + dataSources.getRecordsSkipped() + "\tRecords skipped\n");

      // print summary of available filters
      System.err.println(dataSources.fetchSummary());

      queryFilesFromCmdLine();

      // release file handles
      queryLoader.closeTabixReaders();

    } catch (Exception e) {
      e.printStackTrace();
      System.err.println("\nProblem with executing the TQuery!");
    }
  }
  private void buildXmlPropertiesFile() {
    System.out.println(
        "\nBuilding and checking your pipeline properties file -> " + truncPipePropFile);
    StringBuilder toPrint = new StringBuilder();

    // walk through the prop file
    String[] prop = IO.loadFileIntoStringArray(truncPipePropFile);
    Pattern val = Pattern.compile("(<entry key.+>)([D|A|B].*)</entry>");
    boolean missingFile = false;
    for (String s : prop) {
      // does it match a file needing prepending? Data/, Apps/, Bed/
      Matcher mat = val.matcher(s);
      if (mat.matches()) {
        File test = new File(referenceDir, mat.group(2));
        if (test.exists()) {
          System.out.println("Found\t" + test);
          toPrint.append(mat.group(1));
          toPrint.append(test.toString());
          toPrint.append("</entry>");
        } else {
          System.out.println("Misssing\t" + test);
          missingFile = true;
        }
      }
      // threads?
      else if (s.contains("threads"))
        toPrint.append("<entry key=\"threads\">" + threads + "</entry>");

      // nope just save it and add a line return
      else toPrint.append(s);
      toPrint.append("\n");
    }
    // anything missing? if so exit
    if (missingFile)
      Misc.printErrAndExit(
          "\nFailed to find all of the files in your properties file, see above.\n");

    // OK, write it out
    completePipelinePropFile = new File(outputDirectory, "pipelineProperties.xml");
    if (IO.writeString(toPrint.toString(), completePipelinePropFile) == false)
      Misc.printErrAndExit("Problem writing -> " + truncPipePropFile);
  }
 private void checkForGzippedVcf() {
   String vcfName = finalVcf.getName();
   if (vcfName.endsWith(".gz")) {
     File uncomp = new File(finalVcf.getParentFile(), vcfName.substring(0, vcfName.length() - 3));
     if (uncomp.exists() == false) {
       deleteTempVcf = true;
       if (IO.uncompress(finalVcf, uncomp) == null)
         Misc.printErrAndExit("ERROR: failed to uncompress -> " + finalVcf);
     }
     finalVcf = uncomp;
   }
 }
  public boolean bedType(File bedFile) {
    stranded = false;
    scored = false;
    named = false;
    bed12 = false;

    boolean bedLineFound = false;

    try {

      // read through first 10000 lines
      // chrom start stop name score strand ....
      //  0     1     2   3     4     5
      BufferedReader in = IO.fetchBufferedReader(bedFile);
      String[] tokens;
      String line;
      int counter = 0;
      while ((line = in.readLine()) != null) {
        if (counter++ > 10000) break;
        // bed line?
        if (bedLine.matcher(line).matches() == false) continue;
        bedLineFound = true;
        // split on tab
        tokens = tab.split(line);
        // bed12?
        if (tokens.length == 12) {
          bed12 = true;
          break;
        }
        if (tokens.length > 3) {
          // named?
          if (badName.matcher(tokens[3]).matches() == false) named = true;
          if (tokens.length > 4) {
            // scored?
            if (tokens[4].equals("0") == false) scored = true;
            if (tokens.length > 5) {
              // stranded?
              if (tokens[5].equals(".") == false) stranded = true;
            }
          }
        }
      }

      in.close();

    } catch (Exception e) {
      System.err.println("\nProblem parsing bed type.\n");
      e.printStackTrace();
      bedLineFound = false;
    }

    return bedLineFound;
  }
  private void executePipelineJob() {
    String[] cmd = null;
    try {
      // write out tempTemplate
      File template = new File(outputDirectory, "pipelineTemplate.xml");
      if (IO.writeString(xmlTemplate, template) == false)
        Misc.printErrAndExit("Problem writing -> " + template);

      // build and execute cmd
      cmd =
          new String[] {
            "java",
            "-jar",
            "-Xmx2G",
            pJar.getCanonicalPath(),
            "-props",
            completePipelinePropFile.getCanonicalPath(),
            template.getCanonicalPath()
          };

      String stringCmd = Misc.stringArrayToString(cmd, " ");
      System.out.println("\nExecuting:\n" + stringCmd);
      System.out.println("\nPipelineOutput:");
      String[] out = IO.executeViaProcessBuilder(cmd, true);

      // check output for possible errors
      for (String line : out) {
        String lcLine = line.toLowerCase();
        // watch out for cases where error is mentioned in a warning output line.
        if (lcLine.contains("error") && lcLine.startsWith("warning") == false)
          Misc.printErrAndExit(
              "\n\nERROR found in Pipeline.jar output, see above. Aborting!\n" + line);
      }

    } catch (Exception e) {
      e.printStackTrace();
      Misc.printErrAndExit("ERROR: executing " + Misc.stringArrayToString(cmd, " "));
    }
  }
Exemplo n.º 10
0
 /** This method will process each argument and assign new varibles */
 public void processArgs(String[] args) {
   Pattern pat = Pattern.compile("-[a-z]");
   for (int i = 0; i < args.length; i++) {
     String lcArg = args[i].toLowerCase();
     Matcher mat = pat.matcher(lcArg);
     if (mat.matches()) {
       char test = args[i].charAt(1);
       try {
         switch (test) {
           case 'f':
             fastas = IO.extractFiles(args[i + 1], "fasta");
             i++;
             break;
           case 'n':
             numberOfBases = Integer.parseInt(args[i + 1]);
             i++;
             break;
           case 'i':
             indexDirectory = new File(args[i + 1]);
             i++;
             break;
           case 'h':
             printDocs();
             System.exit(0);
           default:
             Misc.printExit("\nError: unknown option! " + mat.group());
         }
       } catch (Exception e) {
         Misc.printExit(
             "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
       }
     }
   }
   // make index directory?
   if (indexDirectory == null) {
     indexDirectory = new File(fastas[0].getParentFile(), "IndexedSequences");
     indexDirectory.mkdir();
   }
 }
  /** This method will process each argument and assign new variables */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    System.out.println(
        "\n" + IO.fetchUSeqVersion() + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n");
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'o':
              jobId = args[++i];
              break;
            case 's':
              sampleId = args[++i];
              break;
            case 'm':
              submitter = args[++i];
              break;
            case 'y':
              analysisType = args[++i];
              break;
            case 'w':
              webRootForLinks = new File(args[++i]);
              break;
            case 'e':
              snpEffGenome = args[++i];
              break;
            case 'i':
              minimumReadDepth = args[++i];
              break;
            case 't':
              threads = args[++i];
              break;
            case 'l':
              uploadVarsToNGSWeb = true;
              break;
            case 'j':
              pJar = new File(args[++i]);
              break;
            case 'p':
              truncPipePropFile = new File(args[++i]);
              break;
            case 'q':
              bedForCoverageQC = new File(args[++i]);
              break;
            case 'b':
              bedForVarCalling = new File(args[++i]);
              break;
            case 'r':
              fastaReference = new File(args[++i]);
              break;
            case 'u':
              unfilteredBam = new File(args[++i]);
              break;
            case 'f':
              finalBam = new File(args[++i]);
              break;
            case 'v':
              finalVcf = new File(args[++i]);
              break;
            case 'd':
              outputDirectory = new File(args[++i]);
              break;
            case 'c':
              referenceDir = new File(args[++i]);
              break;
            case 'h':
              printDocs();
              System.exit(0);
            default:
              Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          Misc.printErrAndExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }

    // check output dir and if needed set sampleId
    if (outputDirectory != null) {
      outputDirectory.mkdirs();
      if (sampleId.length() == 0) sampleId = outputDirectory.getName();
    }
    // check root directory if needed
    if (webRootForLinks != null) {
      if (webRootForLinks.exists() == false) webRootForLinks.mkdirs();
      // links dir?
      File links = new File(webRootForLinks, "links");
      if (links.exists() == false) links.mkdirs();
    }

    // look for required fields and files
    checkPrintFields();
    checkPrintFiles();
    checkForGzippedVcf();
  }
Exemplo n.º 12
0
  /** This method will process each argument and assign new variables */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    File forExtraction = null;
    File ucscDir = null;
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'b':
              forExtraction = new File(args[++i]);
              break;
            case 'd':
              ucscDir = new File(args[++i]);
              break;
            case 'v':
              versionedGenome = args[++i];
              break;
            case 'e':
              verbose = false;
              break;
            case 'f':
              forceConversion = true;
              break;
            case 'h':
              printDocs();
              System.exit(0);
              break;
            default:
              USeqUtilities.printExit("\nProblem, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          USeqUtilities.printExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }
    if (verbose)
      System.out.println(
          "\n"
              + IO.fetchUSeqVersion()
              + " Arguments: "
              + USeqUtilities.stringArrayToString(args, " ")
              + "\n");

    // versioned genome?
    if (versionedGenome == null)
      USeqUtilities.printExit(
          "\nError: you must supply a genome version. Goto http://genome.ucsc.edu/cgi-"
              + "bin/hgGateway load your organism to find the associated genome version (e.g. H_sapiens_Mar_2006, H_sapiens_Feb_2009).\n");

    // make files
    if (ucscDir == null || ucscDir.isDirectory() == false)
      USeqUtilities.printExit(
          "\nCannot find your directory containing the UCSC wig2BigWig and bed2BigBed apps -> "
              + ucscDir);
    bigWigToWig = new File(ucscDir, "bigWigToWig");
    bigBedToBed = new File(ucscDir, "bigBedToBed");

    // check files
    // if (bigWigToBedGraph.canExecute() == false) USeqUtilities.printExit("\nCannot find or execute
    // -> "+bigWigToBedGraph+"\n");
    if (bigWigToWig.canExecute() == false)
      USeqUtilities.printExit("\nCannot find or execute -> " + bigWigToWig + "\n");
    if (bigBedToBed.canExecute() == false)
      USeqUtilities.printExit("\nCannot find or execute -> " + bigBedToBed + "\n");

    // pull ucsc files
    if (forExtraction == null || forExtraction.exists() == false)
      Misc.printExit(
          "\nError: please enter a bw or bb file or directory containing such to convert!\n");
    File[][] tot = new File[2][];
    tot[0] = USeqUtilities.fetchFilesRecursively(forExtraction, ".bw");
    tot[1] = USeqUtilities.fetchFilesRecursively(forExtraction, ".bb");

    ucscArchives = IO.collapseFileArray(tot);
    if (ucscArchives == null || ucscArchives.length == 0 || ucscArchives[0].canRead() == false)
      Misc.printExit("\nError: cannot find or read any xxx.bb or xxx.bw file(s)!\n");
  }
Exemplo n.º 13
0
  /** This method will process each argument and assign new varibles */
  public void processArgs(String[] args) {
    Pattern pat = Pattern.compile("-[a-z]");
    String useqVersion = IO.fetchUSeqVersion();
    System.err.println(
        "\n" + useqVersion + " Arguments: " + Misc.stringArrayToString(args, " ") + "\n");
    File tabixDataDir = null;
    for (int i = 0; i < args.length; i++) {
      String lcArg = args[i].toLowerCase();
      Matcher mat = pat.matcher(lcArg);
      if (mat.matches()) {
        char test = args[i].charAt(1);
        try {
          switch (test) {
            case 'c':
              chrLengthFile = new File(args[++i]);
              break;
            case 'd':
              tabixDataDir = new File(args[++i]);
              break;
            case 'n':
              numberThreads = Integer.parseInt(args[++i]);
              break;
            case 'q':
              numberQueriesInChunk = Integer.parseInt(args[++i]);
              break;
            default:
              Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
          }
        } catch (Exception e) {
          Misc.printErrAndExit(
              "\nSorry, something doesn't look right with this parameter: -" + test + "\n");
        }
      }
    }
    if (chrLengthFile == null)
      Misc.printErrAndExit(
          "\nError: please provide a bed file of chromosome and their lengths, e.g. X 0 155270560\n");
    if (tabixDataDir == null || tabixDataDir.isDirectory() == false)
      Misc.printErrAndExit(
          "\nError: please provide a directory containing tabix indexed xxx.vcf.gz and xxx.bed.gz files with their associated xxx.gz.tbi indexes");

    // pull data sources
    vcfDataFiles = IO.fetchFilesRecursively(tabixDataDir, "vcf.gz");
    bedDataFiles = IO.fetchFilesRecursively(tabixDataDir, "bed.gz");
    mafDataFiles = IO.fetchFilesRecursively(tabixDataDir, "maf.txt.gz");
    if (vcfDataFiles.length == 0 && bedDataFiles.length == 0 && mafDataFiles.length == 0)
      Misc.printErrAndExit(
          "\nError: failed to find any xxx.bed.gz, xxx.vcf.gz, or xxx.maf.txt.gz tabix files in your tabixDataDir -> "
              + tabixDataDir);

    // check for index
    lookForTabixIndex(vcfDataFiles);
    lookForTabixIndex(bedDataFiles);
    lookForTabixIndex(mafDataFiles);

    // threads to use
    int numAvail = Runtime.getRuntime().availableProcessors();
    if (numberThreads < 1) numberThreads = numAvail - 1;
    System.err.println(
        numAvail + " Available processors, using " + numberThreads + " threaded loaders\n");
  }