Esempio n. 1
0
 protected String makeRegionFile(
     Context context, ChromosomeRange r, PreprocessingTools tools, String region)
     throws URISyntaxException, IOException, InterruptedException {
   // if exome dont do but for exome filter on exomeBedFile
   if (filterBedFile == null) {
     r.writeToPicardRegionFile(region);
   } else {
     String exomebed = tmpFileBase + "exome.bed";
     if (filterBedFile.endsWith(".gz")) {
       exomebed += ".gz";
     }
     HalvadeFileUtils.downloadFileFromHDFS(
         context,
         FileSystem.get(new URI(filterBedFile), context.getConfiguration()),
         filterBedFile,
         exomebed);
     if (exomebed.endsWith(".gz")) {
       exomebed = HalvadeFileUtils.Unzip(exomebed);
     }
     region = tools.filterExomeBed(exomebed, r);
     if (region == null) {
       Logger.DEBUG("empty region file, no vcf results!!");
       return null;
     }
     HalvadeFileUtils.removeLocalFile(keep, exomebed);
   }
   return region;
 }
Esempio n. 2
0
  protected void splitNTrim(
      Context context, String region, GATKTools gatk, String input, String output)
      throws InterruptedException {
    Logger.DEBUG("run SplitNCigarReads");
    context.setStatus("run SplitNCigarReads");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runSplitNCigarReads(input, output, ref, region, newMaxQualScore);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai"));
  }
Esempio n. 3
0
  protected void elPrepPreprocess(
      Context context, PreprocessingTools tools, SAMRecordIterator input, String output)
      throws InterruptedException, IOException, QualityException, URISyntaxException {
    String dictF = ref.substring(0, ref.lastIndexOf('.')) + ".dict";
    String rg = createReadGroupRecordString(RGID, RGLB, RGPL, RGPU, RGSM);
    String preSamOut = tmpFileBase + "-p1.sam";
    String samOut = tmpFileBase + "-p2.sam";
    String fCounts = tmpFileBase + "-features.count";

    outHeader = header.clone();
    outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);

    Logger.DEBUG("call elPrep");
    context.setStatus("call elPrep");
    int reads;
    if (keep) {
      reads =
          tools.callElPrep(
              preSamOut, samOut, inputIsBam ? null : rg, threads, input, outHeader, dictF);
    } else {
      reads =
          tools.streamElPrep(
              context, samOut, inputIsBam ? null : rg, threads, input, outHeader, dictF);
    }

    Logger.DEBUG(reads + " reads processed in elPrep");
    context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads);

    if (gff != null) {
      Logger.DEBUG("featureCounts");
      context.setStatus("featureCounts");
      tools.runFeatureCounts(gff, samOut, fCounts, threads);
      HalvadeFileUtils.uploadFileToHDFS(
          context,
          FileSystem.get(new URI(outputdir), context.getConfiguration()),
          fCounts,
          outputdir + context.getTaskAttemptID().toString() + ".count");
    }
    context.setStatus("convert SAM to BAM");
    Logger.DEBUG("convert SAM to BAM");
    tools.callSAMToBAM(samOut, output, threads);
    context.setStatus("build bam index");
    Logger.DEBUG("build bam index");
    tools.runBuildBamIndex(output);
    // remove temporary files
    HalvadeFileUtils.removeLocalFile(keep, preSamOut, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, samOut, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, fCounts);
  }
Esempio n. 4
0
  protected void RnaVariantCalling(
      Context context, String region, GATKTools gatk, String input, String output)
      throws InterruptedException {
    // choose between unifiendgenotyper vs haplotypegenotyper
    Logger.DEBUG("run variantCaller");
    context.setStatus("run variantCaller");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runHaplotypeCaller(input, output, true, scc, sec, ref, null, region);

    context.setStatus("cleanup");
    context.getCounter(HalvadeCounters.OUT_VCF_FILES).increment(1);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai"));
  }
Esempio n. 5
0
 private BWAAlnInstance(Mapper.Context context, String bin)
     throws IOException, URISyntaxException {
   super(context, bin);
   taskId = context.getTaskAttemptID().toString();
   taskId = taskId.substring(taskId.indexOf("m_"));
   ref = HalvadeFileUtils.downloadBWAIndex(context, taskId);
   alnCustomArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "bwa", "aln");
 }
Esempio n. 6
0
  protected void annotateVariants(
      Context context, String region, GATKTools gatk, String input, String output)
      throws InterruptedException {
    Logger.DEBUG("run VariantAnnotator");
    context.setStatus("run VariantAnnotator");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runVariantAnnotator(input, output, ref, region);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
  }
Esempio n. 7
0
  // TODO improve annotate/filter
  protected void filterVariants(
      Context context, String region, GATKTools gatk, String input, String output)
      throws InterruptedException {
    Logger.DEBUG("run VariantFiltration");
    context.setStatus("run VariantFiltration");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runVariantFiltration(input, output, ref, region, windows, cluster, minFS, maxQD);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
  }
Esempio n. 8
0
  protected void indelRealignment(
      Context context, String region, GATKTools gatk, String input, String output)
      throws InterruptedException {
    String targets = tmpFileBase + ".intervals";

    Logger.DEBUG("run RealignerTargetCreator");
    context.setStatus("run RealignerTargetCreator");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runRealignerTargetCreator(input, targets, ref, region);

    Logger.DEBUG("run IndelRealigner");
    context.setStatus("run IndelRealigner");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runIndelRealigner(input, targets, output, ref, region);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai"));
    HalvadeFileUtils.removeLocalFile(keep, targets, context, HalvadeCounters.FOUT_GATK_TMP);
  }
Esempio n. 9
0
  @Override
  public void closeAligner() throws InterruptedException {
    // close last BWA aln
    closeBWAAln();

    // but now start sampe/samse
    startBWASamXe();
    ssh.join();
    int error = samxe.waitForCompletion();
    if (error != 0) throw new ProcessException("BWA samXe", error);
    context.getCounter(HalvadeCounters.TIME_BWA_SAMPE).increment(samxe.getExecutionTime());

    // remove all temporary fastq/sai files
    HalvadeFileUtils.removeLocalFile(
        keep, getFileName(tmpdir, taskId, true, 1), context, HalvadeCounters.FOUT_BWA_TMP);
    HalvadeFileUtils.removeLocalFile(
        keep, getFileName(tmpdir, taskId, false, 1), context, HalvadeCounters.FOUT_BWA_TMP);
    HalvadeFileUtils.removeLocalFile(
        keep, getFileName(tmpdir, taskId, true, 2), context, HalvadeCounters.FOUT_BWA_TMP);
    HalvadeFileUtils.removeLocalFile(
        keep, getFileName(tmpdir, taskId, false, 2), context, HalvadeCounters.FOUT_BWA_TMP);
    instance = null;
  }
Esempio n. 10
0
  protected void baseQualityScoreRecalibration(
      Context context,
      String region,
      ChromosomeRange r,
      PreprocessingTools tools,
      GATKTools gatk,
      String input,
      String output)
      throws InterruptedException, IOException, URISyntaxException {
    String table = tmpFileBase + ".table";

    // get snp database(s)
    String[] snpslocal = HalvadeFileUtils.downloadSites(context, taskId);
    String[] newKnownSites = new String[snpslocal.length];
    for (int i = 0; i < snpslocal.length; i++) {
      if (filterDBsnp) {
        newKnownSites[i] =
            tools.filterDBSnps(
                ref.replaceAll("fasta", "dict"), snpslocal[i], r, tmpFileBase, threads);
      } else {
        newKnownSites[i] = snpslocal[i];
      }
      if (newKnownSites[i].endsWith(".gz")) {
        newKnownSites[i] = HalvadeFileUtils.Unzip(newKnownSites[i]);
      }
    }

    // should be created automatically by GATK v3.0 or higher
    //        Logger.DEBUG("build bam index");
    //        context.setStatus("build bam index");
    //        tools.runBuildBamIndex(tmpFile1);
    Logger.DEBUG("run baseRecalibrator");
    context.setStatus("run baseRecalibrator");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runBaseRecalibrator(input, table, ref, newKnownSites, region);

    Logger.DEBUG("run printReads");
    context.setStatus("run printReads");
    context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1);
    gatk.runPrintReads(input, output, ref, table, region);

    HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai"));
    HalvadeFileUtils.removeLocalFile(keep, table, context, HalvadeCounters.FOUT_GATK_TMP);
    for (int i = 0; i < newKnownSites.length; i++) {
      if (filterDBsnp) {
        HalvadeFileUtils.removeLocalFile(
            keep, newKnownSites[i], context, HalvadeCounters.FOUT_GATK_TMP);
      }
    }
  }
Esempio n. 11
0
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   isFirstAttempt = taskId.endsWith("_0");
   isRNA = HalvadeConf.getIsRNA(context.getConfiguration());
   scc = HalvadeConf.getSCC(context.getConfiguration(), isRNA);
   sec = HalvadeConf.getSEC(context.getConfiguration(), isRNA);
   try {
     gff = HalvadeFileUtils.downloadGFF(context, taskId);
   } catch (URISyntaxException ex) {
     Logger.EXCEPTION(ex);
     throw new InterruptedException("Error when downloading GFF file");
   }
   filterBedFile = HalvadeConf.getFilterBed(context.getConfiguration());
   filterDBsnp = HalvadeConf.getFilterDBSnp(context.getConfiguration());
   useUnifiedGenotyper = HalvadeConf.getUseUnifiedGenotyper(context.getConfiguration());
   redistribute = HalvadeConf.getRedistribute(context.getConfiguration());
   containers = HalvadeConf.getMapContainerCount(context.getConfiguration());
   tasksLeft = Integer.parseInt(context.getConfiguration().get("mapred.map.tasks")) - taskNr;
   // get task number:
   if (redistribute && tasksLeft < containers) {
     threads = 6;
   }
 }
Esempio n. 12
0
  protected void PicardPreprocess(
      Context context, PreprocessingTools tools, SAMRecordIterator input, String output)
      throws InterruptedException, QualityException, IOException, URISyntaxException {
    outHeader = header.clone();
    outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
    // tmp files
    String tmpOut1 = tmpFileBase + "-p1.bam";
    String tmpOut2 = tmpFileBase + "-p2.bam";
    String tmpOut3 = tmpFileBase + "-p3.sam";
    String fCounts = tmpFileBase + "-features.count";
    String tmpMetrics = tmpFileBase + "-p3-metrics.txt";
    SAMFileWriterFactory factory = new SAMFileWriterFactory();
    if (!inputIsBam) {
      outHeader.addReadGroup(bamrg);
    }
    SAMFileWriter writer = factory.makeBAMWriter(outHeader, true, new File(tmpOut1));

    long startTime = System.currentTimeMillis();

    int count = 0;
    SAMRecord sam;
    while (input.hasNext()) {
      sam = input.next();
      writer.addAlignment(sam);
      count++;
    }
    int reads = input.getCount();
    writer.close();

    context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads);
    long estimatedTime = System.currentTimeMillis() - startTime;
    context.getCounter(HalvadeCounters.TIME_HADOOP_SAMTOBAM).increment(estimatedTime);
    Logger.DEBUG("time writing " + count + " records to disk: " + estimatedTime / 1000);

    Logger.DEBUG("clean sam");
    context.setStatus("clean sam");
    tools.runCleanSam(tmpOut1, tmpOut2);
    Logger.DEBUG("mark duplicates");
    context.setStatus("mark duplicates");
    tools.runMarkDuplicates(tmpOut2, tmpOut3, tmpMetrics);

    if (gff != null) {
      // tmpOut3 is sam for htseq count!
      Logger.DEBUG("featureCounts");
      context.setStatus("featureCounts");
      tools.runFeatureCounts(gff, tmpOut3, fCounts, threads);
      HalvadeFileUtils.uploadFileToHDFS(
          context,
          FileSystem.get(new URI(outputdir), context.getConfiguration()),
          fCounts,
          outputdir + context.getTaskAttemptID().toString() + ".count");
    }

    if (!inputIsBam) {
      Logger.DEBUG("add read-group");
      context.setStatus("add read-group");
      tools.runAddOrReplaceReadGroups(tmpOut3, output, RGID, RGLB, RGPL, RGPU, RGSM);
    } else {
      context.setStatus("convert SAM to BAM");
      Logger.DEBUG("convert SAM to BAM");
      tools.callSAMToBAM(tmpOut3, output, threads);
    }

    Logger.DEBUG("build bam index");
    context.setStatus("build bam index");
    tools.runBuildBamIndex(output);

    estimatedTime = System.currentTimeMillis() - startTime;
    Logger.DEBUG("estimated time: " + estimatedTime / 1000);

    // remove all temporary files now!
    HalvadeFileUtils.removeLocalFile(keep, tmpMetrics, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, tmpOut1, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, tmpOut2, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, tmpOut3, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, fCounts);
  }