private String getVcfName(Predicate<? super VcfInfo> vcfPredicate, List<VcfInfo> vcfList) { if (this.allowMissingFiles) { VcfInfo dummy = new VcfInfo(); dummy.setFileName(""); dummy.setIndexFileName(""); dummy.setObjectID(""); dummy.setIndexObjectID(""); VcfInfo v = vcfList.stream().filter(vcfPredicate).findFirst().orElse(dummy); return v.getFileName(); } else { return vcfList.stream().filter(vcfPredicate).findFirst().get().getFileName(); } }
/** Build the workflow!! */ @Override public void buildWorkflow() { try { this.init(); // Pull the repo. Job configJob = GitUtils.gitConfig(this.getWorkflow(), this.GITname, this.GITemail); Job copy = this.copyCredentials(configJob); Job pullRepo = GitUtils.pullRepo( this.getWorkflow(), this.GITPemFile, this.JSONrepo, this.JSONrepoName, this.JSONlocation); pullRepo.addParent(copy); Job installTools = this.installTools(copy); // indicate job is in downloading stage. String pathToScripts = this.getWorkflowBaseDir() + "/scripts"; Job move2download = GitUtils.gitMove( "queued-jobs", "downloading-jobs", this.getWorkflow(), this.JSONlocation, this.JSONrepoName, this.JSONfolderName, this.GITname, this.GITemail, this.gitMoveTestMode, this.JSONfileName, pathToScripts, installTools, pullRepo); Job move2running; if (!skipDownload) { move2running = doDownload(pathToScripts, move2download); } else { // If user is skipping download, then we will just move directly to runnning... move2running = GitUtils.gitMove( "downloading-jobs", "running-jobs", this.getWorkflow(), this.JSONlocation, this.JSONrepoName, this.JSONfolderName, this.GITname, this.GITemail, this.gitMoveTestMode, this.JSONfileName, pathToScripts, move2download); } Job statFiles = statInputFiles(move2running); Job sangerPassFilter = this.passFilterWorkflow(Pipeline.sanger, statFiles); Job broadPassFilter = this.passFilterWorkflow(Pipeline.broad, statFiles); Job dkfzemblPassFilter = this.passFilterWorkflow(Pipeline.dkfz_embl, statFiles); Job smufinPassFilter = this.passFilterWorkflow(Pipeline.smufin, statFiles); // ...No, we're not going to filter the Muse SNV file. // update all filenames to include ".pass-filtered." Function<String, String> addPassFilteredSuffix = (x) -> { return x.replace(".vcf.gz", ".pass-filtered.vcf.gz"); }; for (VcfInfo vInfo : this.vcfs) { // ...except for MUSE filenames. if (vInfo.getOriginatingPipeline() != Pipeline.muse) { vInfo.setFileName(addPassFilteredSuffix.apply(vInfo.getFileName())); } } // OxoG will run after move2running. Move2running will run after all the jobs that perform // input file downloads and file preprocessing have finished. List<Job> preprocessIndelsJobs = new ArrayList<Job>(this.tumours.size() * 3); for (int i = 0; i < this.tumours.size(); i++) { String tumourAliquotID = tumours.get(i).getAliquotID(); final String vcfNotFoundToken = "VCF_NOT_FOUND"; BiFunction<String, Predicate<VcfInfo>, String> generateVcfName = (s, p) -> "/" + s + "/" + this.vcfs .stream() .filter(vcfMatchesTypePipelineTumour(isIndel, p, tumourAliquotID)) .map(m -> m.getFileName()) .findFirst() .orElse(vcfNotFoundToken); String sangerIndelVcfName = generateVcfName.apply(this.sangerGnosID, CommonPredicates.isSanger); if (!sangerIndelVcfName.endsWith(vcfNotFoundToken)) { Job sangerPreprocessVCF = this.preProcessIndelVCF( sangerPassFilter, Pipeline.sanger, sangerIndelVcfName, this.tumours.get(i).getAliquotID()); preprocessIndelsJobs.add(sangerPreprocessVCF); } String dkfzEmblIndelVcfName = generateVcfName.apply(this.dkfzemblGnosID, CommonPredicates.isDkfzEmbl); if (!dkfzEmblIndelVcfName.endsWith(vcfNotFoundToken)) { Job dkfzEmblPreprocessVCF = this.preProcessIndelVCF( dkfzemblPassFilter, Pipeline.dkfz_embl, dkfzEmblIndelVcfName, this.tumours.get(i).getAliquotID()); preprocessIndelsJobs.add(dkfzEmblPreprocessVCF); } String broadIndelVcfName = generateVcfName.apply(this.broadGnosID, CommonPredicates.isBroad); if (!broadIndelVcfName.endsWith(vcfNotFoundToken)) { Job broadPreprocessVCF = this.preProcessIndelVCF( broadPassFilter, Pipeline.broad, broadIndelVcfName, this.tumours.get(i).getAliquotID()); preprocessIndelsJobs.add(broadPreprocessVCF); } // smufin INDEL VCFs will be in /datastore/vcf/smufin - they will not be nested in a GNOS // ID-named directory. String smufinIndelVcfName = generateVcfName.apply("", CommonPredicates.isSmufin); if (!smufinIndelVcfName.endsWith(vcfNotFoundToken)) { Job smufinPreprocessVCF = this.preProcessIndelVCF( smufinPassFilter, Pipeline.smufin, smufinIndelVcfName, this.tumours.get(i).getAliquotID()); preprocessIndelsJobs.add(smufinPreprocessVCF); } } // TODO: This probably doesn't need to be a list anymore. List<Job> combineVCFJobs = new ArrayList<Job>(this.tumours.size()); Job combineVCFJob = this.combineVCFsByType( preprocessIndelsJobs.toArray(new Job[preprocessIndelsJobs.size()])); combineVCFJobs.add(combineVCFJob); List<Job> oxogJobs = new ArrayList<Job>(this.tumours.size()); for (int i = 0; i < this.tumours.size(); i++) { TumourInfo tInf = this.tumours.get(i); Job oxoG; if (i > 0) { // OxoG jobs can put a heavy CPU load on the system (in bursts) so probably better to run // them in sequence. // If there is > 1 OxoG job (i.e. a multi-tumour donor), each OxoG job should have the // prior OxoG job as its parent. oxoG = this.doOxoG( tInf.getTumourBamGnosID() + "/" + tInf.getTumourBAMFileName(), tInf.getAliquotID(), oxogJobs.get(i - 1)); } else { oxoG = this.doOxoG( tInf.getTumourBamGnosID() + "/" + tInf.getTumourBAMFileName(), tInf.getAliquotID()); } for (int j = 0; j < combineVCFJobs.size(); j++) { oxoG.addParent(combineVCFJobs.get(j)); } oxogJobs.add(oxoG); } String pathToNormalMinibam = "/datastore/bam/normal/" + (DownloadMethod.valueOf(this.bamDownloadMethod) != DownloadMethod.filesystemCopy ? this.normalBamGnosID + "/" : "") + this.normalBAMFileName; Job normalVariantBam = this.doVariantBam( BAMType.normal, pathToNormalMinibam, this.normalBAMFileName, this.normalAliquotID, combineVCFJobs.toArray(new Job[combineVCFJobs.size()])); List<Job> parentJobsToAnnotationJobs = new ArrayList<Job>(this.tumours.size()); // create a list of tumour variant-bam jobs. List<Job> variantBamJobs = new ArrayList<Job>(this.tumours.size() + 1); for (int i = 0; i < this.tumours.size(); i++) { TumourInfo tInfo = this.tumours.get(i); String pathToTumourMinibam = "/datastore/bam/tumour/" + (DownloadMethod.valueOf(this.bamDownloadMethod) != DownloadMethod.filesystemCopy ? tInfo.getTumourBamGnosID() + "/" : "") + tInfo.getTumourBAMFileName(); Job tumourVariantBam = this.doVariantBam( BAMType.tumour, pathToTumourMinibam, tInfo.getTumourBAMFileName(), tInfo.getAliquotID(), combineVCFJobs.toArray(new Job[combineVCFJobs.size()])); variantBamJobs.add(tumourVariantBam); } variantBamJobs.add(normalVariantBam); // Now that we've built our list of variantbam and oxog jobs, we can set up the proper // parent-child relationships between them. // The idea is to run 1 OxoG at the same time as 2 variantbam jobs. for (int i = 2; i < Math.max(variantBamJobs.size(), variantBamJobs.size()); i += 2) { variantBamJobs.get(i).addParent(variantBamJobs.get(i - 2)); if (i + 1 < variantBamJobs.size()) { variantBamJobs.get(i + 1).addParent(variantBamJobs.get(i - 2)); } } Job minibamSanityCheck = this.getWorkflow().createBashJob("Check minibams"); String moveToFailed = GitUtils.gitMoveCommand( "running-jobs", "failed-jobs", this.JSONlocation + "/" + this.JSONrepoName + "/" + this.JSONfolderName, this.JSONfileName, this.gitMoveTestMode, this.getWorkflowBaseDir() + "/scripts/"); // A list of all pass-filtered files that the minibams will be checked against. // NOTE: muse will not have any pass-filtered files and smufin will only have indel (or // SNV-from-indel) files. String filesToCheck = this.vcfs .stream() .filter( p -> p.getFileName().endsWith(".vcf.gz") && p.getFileName().contains("pass-filter")) .map( m -> "/datastore/vcf/" + m.getOriginatingPipeline().toString() + "/" + (m.getOriginatingPipeline() != Pipeline.smufin ? m.getPipelineGnosID() + "/" : "/") + m.getFileName()) .sorted() .reduce("", (a, b) -> a += " " + b); minibamSanityCheck.setCommand( "(bash " + pathToScripts + "/check_minibams.sh " + filesToCheck + ") || " + moveToFailed); variantBamJobs.stream().forEach(job -> minibamSanityCheck.addParent(job)); parentJobsToAnnotationJobs.add(minibamSanityCheck); // set up parent jobs to annotation jobs oxogJobs.stream().forEach(job -> parentJobsToAnnotationJobs.add(job)); List<Job> annotationJobs = new ArrayList<Job>(); if (!this.skipAnnotation) { annotationJobs = this.doAnnotations( parentJobsToAnnotationJobs.toArray(new Job[parentJobsToAnnotationJobs.size()])); } // Now do the Upload. The parents jobs are the Annotation jobs, unless the user set // skipAnnotations=true, so there will // not be any annotation jobs. In that case Upload's parents are the jobs that would have been // parents to Annotation. Job[] parentsToUpload = (annotationJobs != null && annotationJobs.size() > 0) ? annotationJobs.toArray(new Job[annotationJobs.size()]) : parentJobsToAnnotationJobs.toArray(new Job[parentJobsToAnnotationJobs.size()]); // indicate job is in uploading stage. Job move2uploading = this.gitMove("running-jobs", "uploading-jobs", parentsToUpload); Job uploadResults = doUpload(move2uploading); // indicate job is complete. this.gitMove("uploading-jobs", "completed-jobs", uploadResults); // System.out.println(this.filesForUpload); } catch (Exception e) { throw new RuntimeException("Exception caught: " + e.getMessage(), e); } }