/** * Process a single {@link SequenceFile} * * @param sequenceFile file to process * @throws FileProcessorException if an error occurs while processing */ private void processSingleFile(SequenceFile sequenceFile) throws FileProcessorException { Path fileToProcess = sequenceFile.getFile(); AnalysisFastQC.AnalysisFastQCBuilder analysis = AnalysisFastQC.builder() .executionManagerAnalysisId(EXECUTION_MANAGER_ANALYSIS_ID) .description( messageSource.getMessage( "fastqc.file.processor.analysis.description", null, LocaleContextHolder.getLocale())); try { uk.ac.babraham.FastQC.Sequence.SequenceFile fastQCSequenceFile = SequenceFactory.getSequenceFile(fileToProcess.toFile()); BasicStats basicStats = new BasicStats(); PerBaseQualityScores pbqs = new PerBaseQualityScores(); PerSequenceQualityScores psqs = new PerSequenceQualityScores(); OverRepresentedSeqs overRep = new OverRepresentedSeqs(); QCModule[] moduleList = new QCModule[] {basicStats, pbqs, psqs, overRep}; logger.debug("Launching FastQC analysis modules on all sequences."); while (fastQCSequenceFile.hasNext()) { Sequence sequence = fastQCSequenceFile.next(); for (QCModule module : moduleList) { module.processSequence(sequence); } } logger.debug("Finished FastQC analysis modules."); handleBasicStats(basicStats, analysis); handlePerBaseQualityScores(pbqs, analysis); handlePerSequenceQualityScores(psqs, analysis); handleDuplicationLevel(overRep.duplicationLevelModule(), analysis); Set<OverrepresentedSequence> overrepresentedSequences = handleOverRepresentedSequences(overRep); logger.trace("Saving FastQC analysis."); analysis.overrepresentedSequences(overrepresentedSequences); sequenceFile.setFastQCAnalysis(analysis.build()); sequenceFileRepository.save(sequenceFile); } catch (Exception e) { logger.error("FastQC failed to process the sequence file. Stack trace follows.", e); throw new FileProcessorException("FastQC failed to parse the sequence file.", e); } }
/** * Handle getting over represented sequences from fastqc. * * @param seqs overrepresented sequences. * @return a collection of {@link OverrepresentedSequence} corresponding to the FastQC {@link * OverRepresentedSeqs}. */ private Set<OverrepresentedSequence> handleOverRepresentedSequences(OverRepresentedSeqs seqs) { OverrepresentedSeq[] sequences = seqs.getOverrepresentedSequences(); if (sequences == null) { return Collections.emptySet(); } Set<OverrepresentedSequence> overrepresentedSequences = new HashSet<>(sequences.length); for (OverrepresentedSeq s : sequences) { String sequenceString = s.seq(); int count = s.count(); BigDecimal percent = BigDecimal.valueOf(s.percentage()); String possibleSource = s.contaminantHit(); overrepresentedSequences.add( new OverrepresentedSequence(sequenceString, count, percent, possibleSource)); } return overrepresentedSequences; }