/** * Uncompress a file to a temporary file. * * @param context Step context * @param file file to uncompress * @return the path to the uncompressed file * @throws IOException if an error occurs while creating the uncompressed file */ private File uncompressFile(final TaskContext context, final DataFile file) throws IOException { checkNotNull(file, "file argument cannot be null"); final DataFile realFile; final DataProtocol protocol = file.getProtocol(); // Get the underlying file if the file protocol is a storage protocol if (protocol instanceof StorageDataProtocol) { realFile = ((StorageDataProtocol) protocol).getUnderLyingData(file); } else { realFile = file; } final File outputFile = Files.createTempFile( context.getLocalTempDirectory().toPath(), MODULE_NAME + "-", realFile.getExtension()) .toFile(); context.getLogger().fine("Uncompress/copy " + realFile + " to " + outputFile); DataFiles.copy(realFile, new DataFile(outputFile)); return outputFile; }
/** * Uncompress a file if compressed. * * @param context the step context * @param temporaryFiles the list of temporary files * @param file the file to process * @return the absolute path of the file (once uncompressed or not) * @throws IOException if an error occurs while uncompressing the file */ private File uncompressFileIfNecessary( final TaskContext context, List<File> temporaryFiles, final DataFile file) throws IOException { checkNotNull(file, "file argument cannot be null"); final File result; if (file.getCompressionType() != CompressionType.NONE || !file.isLocalFile()) { // Uncompress file final File uncompressedFile = uncompressFile(context, file); // Add the temporary file to the file of the file to remove temporaryFiles.add(uncompressedFile); result = uncompressedFile; } else { result = file.toFile(); } return result; }
/** * Public constructor. * * @param file the workflow file * @throws IOException if an error occurs while opening the file */ public CommandWorkflowParser(final DataFile file) throws IOException { this(file.open()); }
@Override public void merge(final Iterator<DataFile> inFileIterator, DataFile outFile) throws IOException { final Multiset<String> counts = HashMultiset.create(); final Set<String> emptyCounts = new HashSet<>(); while (inFileIterator.hasNext()) { // Get input file final DataFile inFile = inFileIterator.next(); EoulsanLogger.getLogger().info("Merge " + inFile.getName() + " to " + outFile.getName()); boolean first = true; try (BufferedReader reader = new BufferedReader(new InputStreamReader(inFile.open()))) { String line = null; while ((line = reader.readLine()) != null) { // Do no handle header if (first) { first = false; continue; } final int tabPos = line.indexOf('\t'); // Do not handle empty or invalid lines if (tabPos == -1) { continue; } try { final String id = line.substring(0, tabPos).trim(); final int count = Integer.parseInt(line.substring(tabPos).trim()); if (count == 0) { emptyCounts.add(id); } counts.add(id, count); } catch (NumberFormatException e) { // Do not handle parsing errors } } } } // Write the result file try (Writer writer = new OutputStreamWriter(outFile.create())) { writer.write(ExpressionSplitter.EXPRESSION_FILE_HEADER); // Write the non empty counts for (Multiset.Entry<String> e : counts.entrySet()) { final String id = e.getElement(); // Remove the id from empty counts emptyCounts.remove(id); // Write the entry writer.write(id + '\t' + e.getCount() + '\n'); } // Write the empty counts for (String id : emptyCounts) { writer.write(id + "\t0\n"); } } }
@Override public TaskResult execute(final TaskContext context, final TaskStatus status) { try { final StringBuilder additionalArguments = new StringBuilder(); final Map<String, String> additionalDescription = new HashMap<>(); final List<File> temporaryFiles = new ArrayList<>(); // Search expression parameter is needed if (this.useExpressionStepParameters) { searchExpressionStepParameters(context); } if (this.gtfFile) { // Get the annotation data final Data annotationData = context.getInputData(this.gtfFormat ? ANNOTATION_GTF : ANNOTATION_GFF); // Get the annotation DataFile final DataFile gffFile = annotationData.getDataFile(); final File gffFilePath = uncompressFileIfNecessary(context, temporaryFiles, gffFile); additionalArguments.append("--sjdbGTFfile"); additionalArguments.append(' '); additionalArguments.append(gffFilePath.getAbsolutePath()); additionalArguments.append(' '); additionalDescription.put("sjdbGTFfile", computeMD5SumFile(gffFilePath)); } if (this.overhang != null) { additionalArguments.append("--sjdbOverhang"); additionalArguments.append(' '); additionalArguments.append(this.overhang.toString()); additionalArguments.append(' '); additionalDescription.put("sjdbOverhang", this.overhang.toString()); } if (this.gtfTagExonParentTranscript != null) { additionalArguments.append("--sjdbGTFtagExonParentTranscript"); additionalArguments.append(' '); additionalArguments.append(this.gtfTagExonParentTranscript); additionalArguments.append(' '); additionalDescription.put( "sjdbGTFtagExonParentTranscript", this.gtfTagExonParentTranscript); } if (this.gtfFeatureExon != null) { additionalArguments.append("--sjdbGTFfeatureExon"); additionalArguments.append(' '); additionalArguments.append(this.gtfFeatureExon); additionalArguments.append(' '); additionalDescription.put("sjdbGTFfeatureExon", this.gtfFeatureExon); } if (this.chrStartEndFilename != null) { DataFile chrStartEndFile = new DataFile(this.chrStartEndFilename); if (!chrStartEndFile.exists()) { throw new IOException("Unable to read chromosome startend file: " + chrStartEndFile); } final File chrStartEndFilePath = uncompressFileIfNecessary(context, temporaryFiles, chrStartEndFile); additionalArguments.append("--sjdbFileChrStartEnd"); additionalArguments.append(' '); additionalArguments.append(chrStartEndFilePath.getAbsolutePath()); additionalArguments.append(' '); additionalDescription.put("sjdbFileChrStartEnd", computeMD5SumFile(chrStartEndFilePath)); } if (this.genomeSAindexNbases != null) { additionalArguments.append("--genomeSAindexNbases"); additionalArguments.append(' '); additionalArguments.append(this.genomeSAindexNbases.toString()); additionalArguments.append(' '); additionalDescription.put("genomeSAindexNbases", this.genomeSAindexNbases.toString()); } if (this.genomeChrBinNbits != null) { additionalArguments.append("--genomeChrBinNbits"); additionalArguments.append(' '); additionalArguments.append(this.genomeChrBinNbits.toString()); additionalArguments.append(' '); additionalDescription.put("genomeChrBinNbits", this.genomeChrBinNbits.toString()); } status.setProgressMessage(this.mapper.getMapperName() + " index creation"); // Create the index GenomeMapperIndexGeneratorModule.execute( this.mapper, context, additionalArguments.toString(), additionalDescription, Common.getThreadsNumber(this.localThreads, this.maxLocalThreads)); // Remove temporary files for (File temporaryFile : temporaryFiles) { if (!temporaryFile.delete()) { context.getLogger().warning("Cannot remove temporary file: " + temporaryFile); } } } catch (IOException | EoulsanException e) { return status.createTaskResult(e); } return status.createTaskResult(); }