/** * Check that all the ranges of the features that the Intact entry can contain in Intact are not * in conflict with the new sequence(s) proposed by the blast on Swissprot to replace the Trembl * entry. * * @param context : the context of the protein * @return the trembl accession in the context if the sequences of the Swissprot proteins have * some conflicts with the ranges of some features, the Swissprot accession if there is only * one Swissprot protein with no conflicts and null if there are several Swissprot proteins * with no conflict. * @throws uk.ac.ebi.intact.protein.mapping.actions.exception.ActionProcessingException */ public String runAction(IdentificationContext context) throws ActionProcessingException { // always clear the list of reports from previous actions this.listOfReports.clear(); IntactContext intactContext = IntactContext.getCurrentInstance(); // We need to have a specific context containing the previous Trembl accession which totally // matched the Intact protein and the possible // proteins from Swissprot which can replace the Trembl match if (!(context instanceof FeatureRangeCheckingContext)) { throw new ActionProcessingException( "We can't process a feature range checking if the context is a " + context.getClass().getSimpleName() + " and not a FeatureRangeCheckingContext instance."); } else { FeatureRangeCheckingContext processContext = (FeatureRangeCheckingContext) context; int initialNumberOfBlastProtein = processContext.getResultsOfSwissprotRemapping().size(); // Create a DefaultBlastReport BlastReport<BlastResults> report = getReportsFactory().getBlastReport(ActionName.feature_range_checking); this.listOfReports.add(report); // If there were no Swissprot proteins which can replace the Trembl entry, it is an error and // this action fails if (processContext.getResultsOfSwissprotRemapping().isEmpty()) { Status status = new Status( StatusLabel.FAILED, "We don't have any valid results from the Swissprot-remapping process, so we will keep the Trembl entry " + processContext.getTremblAccession()); report.setStatus(status); return processContext.getTremblAccession(); } DaoFactory factory = intactContext.getDaoFactory(); // get the components involving the Intact entry List<Component> components = factory.getComponentDao().getByInteractorAc(processContext.getIntactAccession()); // If there is no component containing this protein, we don't have conflicts with feature // ranges if (components.isEmpty()) { report.getBlastMatchingProteins().addAll(processContext.getResultsOfSwissprotRemapping()); } else { // to check that at least one component has a feature boolean hasAtLeastOneFeature = false; // to check when a conflict has been detected boolean hasRangeConflict = false; for (Component component : components) { Collection<Feature> features = component.getBindingDomains(); if (!features.isEmpty()) { hasAtLeastOneFeature = true; for (Feature feature : features) { Collection<Range> ranges = feature.getRanges(); for (Range range : ranges) { // undetermined ranges are not affected by the new sequence if (!range.isUndetermined()) { for (BlastResults protein : processContext.getResultsOfSwissprotRemapping()) { if (!checkRangeValidWithNewSequence(range, protein, report)) { hasRangeConflict = true; } else { // The sequence of this Blast protein is not in conflict with the range, we // can keep it among the Blast results in the report report.addBlastMatchingProtein(protein); } } } } } } } // If all the components didn't contain any feature, there is no conflict, we can keep the // previous blast results if (!hasAtLeastOneFeature) { report.getBlastMatchingProteins().addAll(processContext.getResultsOfSwissprotRemapping()); } else { // there is no conflict, we can keep the previous blast results if (!hasRangeConflict) { report .getBlastMatchingProteins() .addAll(processContext.getResultsOfSwissprotRemapping()); } } } if (report.getBlastMatchingProteins().isEmpty()) { Status status = new Status( StatusLabel.FAILED, "The swissprot remapping is not possible as there are some conflicts between the sequence of the Swissprot entry and some feature ranges of the protein " + processContext.getIntactAccession() + ". We will keep the Trembl entry " + processContext.getTremblAccession()); report.setStatus(status); return processContext.getTremblAccession(); } else if (report.getBlastMatchingProteins().size() < initialNumberOfBlastProtein) { Status status = new Status( StatusLabel.TO_BE_REVIEWED, processContext.getResultsOfSwissprotRemapping().size() + " Swissprot entries on the initial " + initialNumberOfBlastProtein + " matching Swissprot proteins have a conflict between their sequence and some feature ranges of the protein " + processContext.getIntactAccession()); report.setStatus(status); return processContext.getTremblAccession(); } else { if (report.getBlastMatchingProteins().size() == 1) { BlastResults swissprot = report.getBlastMatchingProteins().iterator().next(); Status status = new Status( StatusLabel.COMPLETED, "We don't have any conflicts between the sequence of the Swissprot entry " + swissprot.getAccession() + " and the feature ranges of the protein " + processContext.getIntactAccession()); report.setStatus(status); return swissprot.getAccession(); } else { Status status = new Status( StatusLabel.COMPLETED, "We don't have any conflicts between the sequence(s) of the " + report.getBlastMatchingProteins().size() + " possible Swissprot proteins and the feature ranges of the protein " + processContext.getIntactAccession()); report.setStatus(status); ArrayList<BlastResults> proteins = new ArrayList<BlastResults>(); // merge the isoforms proteins.addAll(report.getBlastMatchingProteins()); Set<String> accessions = mergeIsoformsFromBlastProteins(proteins); if (accessions.size() == 1) { return accessions.iterator().next(); } else { return processContext.getTremblAccession(); } } } } }
/** * Checks that the sequence of the BlastProtein is not in conflict with the feature ranges that * the protein can have in Intact * * @param range : the range of the feature to check * @param protein : the protein hit which could replace the old protein in Intact * @return true if there is no conflict between the sequence of this BlastProtein and the range */ private boolean checkRangeValidWithNewSequence( Range range, BlastResults protein, BlastReport report) { // The difference between the previous start position and the new one in the new sequence int diffStart = protein.getStartQuery() - protein.getStartMatch(); // The difference between the previous end position and the new one in the new sequence int diffEnd = protein.getEndQuery() - protein.getEndMatch(); // Shift the ranges in consequence int startFrom = range.getFromIntervalStart() - diffStart; int startTo = range.getToIntervalStart() - diffStart; int endFrom = range.getFromIntervalEnd() - diffStart; int endTo = range.getToIntervalEnd() - diffStart; // No ranges should be before the new start positions if (startFrom < protein.getStartMatch() || range.getFromIntervalStart() < protein.getStartQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalStart() + "-" + range.getToIntervalStart() + " and the alignment with the Swissprot sequence starts after " + range.getFromIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } // No ranges should be before the new start positions else if (startTo > protein.getEndMatch() || range.getToIntervalStart() > protein.getEndQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalStart() + "-" + range.getToIntervalStart() + " and the alignment with the Swissprot sequence finishes before " + range.getToIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } // No ranges should be after the new end positions else if (endFrom < protein.getStartMatch() || range.getFromIntervalEnd() < protein.getStartQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalEnd() + "-" + range.getToIntervalEnd() + " and the alignment with the Swissprot sequence starts after " + range.getFromIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } // No ranges should be after the new end positions else if (endTo > protein.getEndMatch() || range.getToIntervalEnd() > protein.getEndQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalEnd() + "-" + range.getToIntervalEnd() + " and the alignment with the Swissprot sequence finishes before " + range.getToIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } else { String rangeSequence = range.getFullSequence(); if (rangeSequence == null) { rangeSequence = range.getFullSequence(); } // Check that the amino acids involved in the feature ranges are identical if (startFrom > 0 && endTo > 0) { String rangeNewSequence = protein.getSequence().substring(startFrom - 1, endTo); if (rangeSequence == null) { report.addWarning( "The feature " + range.getFeature().getAc() + " doesn't contain any sequence or full sequence but has range positions. This entry needs to ce checked by a curator"); return false; } if (!rangeSequence.equals(rangeNewSequence)) { report.addWarning( "The sequence of the Swissprot entry from " + range.getFromIntervalStart() + " to " + range.getToIntervalStart() + " is different from the previous feature sequence, we can't replace the previous sequence with the sequence of the Swissprot entry."); return false; } } } return true; }
public void writeResults( FastaSequence fastaSequence, IdentificationResults<? extends MappingReport> identificationResults) throws IOException { if (identificationResults == null) { throw new IllegalArgumentException("Identification results are expected"); } if (fastaSequence == null) { throw new IllegalArgumentException("A fast sequence is expected"); } BufferedWriter writer = new BufferedWriter(new FileWriter(ouptutFile, true)); // PICR/swissprot remapping could identify one single uniprot id. We want to give the blast // results of swissprot remapping if necessary if (identificationResults.hasUniqueUniprotId()) { // fast identifier writer.write(fastaSequence.getIdentifier()); // we retrieved the uniprot id from PICR writer.write(NEW_COLUMN); writer.write(identificationResults.getFinalUniprotId()); // no other possible uniprot ids from PICR writer.write(NEW_COLUMN); writer.write(EMPTY); // check if last report is not blast remapping to be reviewed MappingReport report = identificationResults.getLastAction(); if (report != null && report.getStatusLabel().equals(StatusLabel.TO_BE_REVIEWED) && report.getName().equals(ActionName.BLAST_Swissprot_Remapping)) { BlastReport<BlastResults> blastReport = (BlastReport<BlastResults>) report; // we have one result that can match Set<BlastResults> blastResults = blastReport.getBlastMatchingProteins(); if (blastResults.size() == 1) { BlastResults blastResult = blastResults.iterator().next(); writeBlastResults(writer, blastResult, fastaSequence); } // we don't write the other results else { writeEmpytBlastResults(writer, fastaSequence); } } // no swissprot remapping to review else { writeEmpytBlastResults(writer, fastaSequence); } } else { // PICR sequence to be reviewed ? if yes, no blast to report MappingReport report = identificationResults.getLastAction(); if (report != null && report.getStatusLabel().equals(StatusLabel.TO_BE_REVIEWED) && (report.getName().equals(ActionName.PICR_sequence_Swissprot) || report.getName().equals(ActionName.PICR_sequence_Trembl))) { // fasta identifier writer.write(fastaSequence.getIdentifier()); // no unique uniprot id writer.write(NEW_COLUMN); writer.write(EMPTY); // possible uniprot from PICR writer.write(NEW_COLUMN); writer.write(StringUtils.join(report.getPossibleAccessions(), ", ")); writeEmpytBlastResults(writer, fastaSequence); } // for each blast to be reviewed, we need to write a line with blast results else if (report != null && report.getStatusLabel().equals(StatusLabel.TO_BE_REVIEWED) && report instanceof BlastReport) { BlastReport<BlastResults> blastReport = (BlastReport<BlastResults>) report; if (blastReport.getBlastMatchingProteins().isEmpty()) { writeEmptyResults(fastaSequence); } else { for (BlastResults blastResult : blastReport.getBlastMatchingProteins()) { // fasta identifier writer.write(fastaSequence.getIdentifier()); // no unique uniprot id writer.write(NEW_COLUMN); writer.write(EMPTY); // no other possible uniprot ids from PICR writer.write(NEW_COLUMN); writer.write(EMPTY); writeBlastResults(writer, blastResult, fastaSequence); } } } // no blast results else { writeEmptyResults(fastaSequence); } } writer.flush(); writer.close(); }