public static float getQueryCoveragePercentFor(BlastResults protein, String sequence) { if (protein == null) { return 0; } return ((float) (protein.getEndQuery() - protein.getStartQuery() + 1)) / (float) sequence.length() * 100; }
/** * @param protein : the blast protein * @return the sequence coverage of the alignment for the match sequence */ public static float getMatchCoveragePercentFor(BlastResults protein) { if (protein == null) { return 0; } return ((float) (protein.getEndMatch() - protein.getStartMatch() + 1)) / (float) protein.getSequence().length() * 100; }
private void writeBlastResults( Writer writer, BlastResults blastResult, FastaSequence fastaSequence) throws IOException { // candidate uniprot writer.write(NEW_COLUMN); writer.write(blastResult.getAccession()); // write identity writer.write(NEW_COLUMN); writer.write(Float.toString(blastResult.getIdentity())); // write sequence coverages writer.write(NEW_COLUMN); writer.write( Float.toString(getQueryCoveragePercentFor(blastResult, fastaSequence.getSequence()))); writer.write(NEW_COLUMN); writer.write(Float.toString(getMatchCoveragePercentFor(blastResult))); // write start/end writer.write(NEW_COLUMN); writer.write(Integer.toString(blastResult.getStartQuery())); writer.write("-"); writer.write(Integer.toString(blastResult.getEndQuery())); writer.write(NEW_COLUMN); writer.write(Integer.toString(blastResult.getStartMatch())); writer.write("-"); writer.write(Integer.toString(blastResult.getEndMatch())); // write query sequence writer.write(NEW_COLUMN); writer.write(fastaSequence.getSequence()); // write alignment writer.write(NEW_COLUMN); writer.write(blastResult.getAlignment()); // write matching sequence writer.write(NEW_COLUMN); writer.write(blastResult.getSequence()); writer.write(NEW_LINE); writer.flush(); }
/** * Checks that the sequence of the BlastProtein is not in conflict with the feature ranges that * the protein can have in Intact * * @param range : the range of the feature to check * @param protein : the protein hit which could replace the old protein in Intact * @return true if there is no conflict between the sequence of this BlastProtein and the range */ private boolean checkRangeValidWithNewSequence( Range range, BlastResults protein, BlastReport report) { // The difference between the previous start position and the new one in the new sequence int diffStart = protein.getStartQuery() - protein.getStartMatch(); // The difference between the previous end position and the new one in the new sequence int diffEnd = protein.getEndQuery() - protein.getEndMatch(); // Shift the ranges in consequence int startFrom = range.getFromIntervalStart() - diffStart; int startTo = range.getToIntervalStart() - diffStart; int endFrom = range.getFromIntervalEnd() - diffStart; int endTo = range.getToIntervalEnd() - diffStart; // No ranges should be before the new start positions if (startFrom < protein.getStartMatch() || range.getFromIntervalStart() < protein.getStartQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalStart() + "-" + range.getToIntervalStart() + " and the alignment with the Swissprot sequence starts after " + range.getFromIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } // No ranges should be before the new start positions else if (startTo > protein.getEndMatch() || range.getToIntervalStart() > protein.getEndQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalStart() + "-" + range.getToIntervalStart() + " and the alignment with the Swissprot sequence finishes before " + range.getToIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } // No ranges should be after the new end positions else if (endFrom < protein.getStartMatch() || range.getFromIntervalEnd() < protein.getStartQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalEnd() + "-" + range.getToIntervalEnd() + " and the alignment with the Swissprot sequence starts after " + range.getFromIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } // No ranges should be after the new end positions else if (endTo > protein.getEndMatch() || range.getToIntervalEnd() > protein.getEndQuery()) { report.addWarning( "The feature range is " + range.getFromIntervalEnd() + "-" + range.getToIntervalEnd() + " and the alignment with the Swissprot sequence finishes before " + range.getToIntervalStart() + ". We can't change the previous sequence with the sequence of the Swissprot entry because it will be incoherent with the current feature(s) of the protein."); return false; } else { String rangeSequence = range.getFullSequence(); if (rangeSequence == null) { rangeSequence = range.getFullSequence(); } // Check that the amino acids involved in the feature ranges are identical if (startFrom > 0 && endTo > 0) { String rangeNewSequence = protein.getSequence().substring(startFrom - 1, endTo); if (rangeSequence == null) { report.addWarning( "The feature " + range.getFeature().getAc() + " doesn't contain any sequence or full sequence but has range positions. This entry needs to ce checked by a curator"); return false; } if (!rangeSequence.equals(rangeNewSequence)) { report.addWarning( "The sequence of the Swissprot entry from " + range.getFromIntervalStart() + " to " + range.getToIntervalStart() + " is different from the previous feature sequence, we can't replace the previous sequence with the sequence of the Swissprot entry."); return false; } } } return true; }
/** * Check that all the ranges of the features that the Intact entry can contain in Intact are not * in conflict with the new sequence(s) proposed by the blast on Swissprot to replace the Trembl * entry. * * @param context : the context of the protein * @return the trembl accession in the context if the sequences of the Swissprot proteins have * some conflicts with the ranges of some features, the Swissprot accession if there is only * one Swissprot protein with no conflicts and null if there are several Swissprot proteins * with no conflict. * @throws uk.ac.ebi.intact.protein.mapping.actions.exception.ActionProcessingException */ public String runAction(IdentificationContext context) throws ActionProcessingException { // always clear the list of reports from previous actions this.listOfReports.clear(); IntactContext intactContext = IntactContext.getCurrentInstance(); // We need to have a specific context containing the previous Trembl accession which totally // matched the Intact protein and the possible // proteins from Swissprot which can replace the Trembl match if (!(context instanceof FeatureRangeCheckingContext)) { throw new ActionProcessingException( "We can't process a feature range checking if the context is a " + context.getClass().getSimpleName() + " and not a FeatureRangeCheckingContext instance."); } else { FeatureRangeCheckingContext processContext = (FeatureRangeCheckingContext) context; int initialNumberOfBlastProtein = processContext.getResultsOfSwissprotRemapping().size(); // Create a DefaultBlastReport BlastReport<BlastResults> report = getReportsFactory().getBlastReport(ActionName.feature_range_checking); this.listOfReports.add(report); // If there were no Swissprot proteins which can replace the Trembl entry, it is an error and // this action fails if (processContext.getResultsOfSwissprotRemapping().isEmpty()) { Status status = new Status( StatusLabel.FAILED, "We don't have any valid results from the Swissprot-remapping process, so we will keep the Trembl entry " + processContext.getTremblAccession()); report.setStatus(status); return processContext.getTremblAccession(); } DaoFactory factory = intactContext.getDaoFactory(); // get the components involving the Intact entry List<Component> components = factory.getComponentDao().getByInteractorAc(processContext.getIntactAccession()); // If there is no component containing this protein, we don't have conflicts with feature // ranges if (components.isEmpty()) { report.getBlastMatchingProteins().addAll(processContext.getResultsOfSwissprotRemapping()); } else { // to check that at least one component has a feature boolean hasAtLeastOneFeature = false; // to check when a conflict has been detected boolean hasRangeConflict = false; for (Component component : components) { Collection<Feature> features = component.getBindingDomains(); if (!features.isEmpty()) { hasAtLeastOneFeature = true; for (Feature feature : features) { Collection<Range> ranges = feature.getRanges(); for (Range range : ranges) { // undetermined ranges are not affected by the new sequence if (!range.isUndetermined()) { for (BlastResults protein : processContext.getResultsOfSwissprotRemapping()) { if (!checkRangeValidWithNewSequence(range, protein, report)) { hasRangeConflict = true; } else { // The sequence of this Blast protein is not in conflict with the range, we // can keep it among the Blast results in the report report.addBlastMatchingProtein(protein); } } } } } } } // If all the components didn't contain any feature, there is no conflict, we can keep the // previous blast results if (!hasAtLeastOneFeature) { report.getBlastMatchingProteins().addAll(processContext.getResultsOfSwissprotRemapping()); } else { // there is no conflict, we can keep the previous blast results if (!hasRangeConflict) { report .getBlastMatchingProteins() .addAll(processContext.getResultsOfSwissprotRemapping()); } } } if (report.getBlastMatchingProteins().isEmpty()) { Status status = new Status( StatusLabel.FAILED, "The swissprot remapping is not possible as there are some conflicts between the sequence of the Swissprot entry and some feature ranges of the protein " + processContext.getIntactAccession() + ". We will keep the Trembl entry " + processContext.getTremblAccession()); report.setStatus(status); return processContext.getTremblAccession(); } else if (report.getBlastMatchingProteins().size() < initialNumberOfBlastProtein) { Status status = new Status( StatusLabel.TO_BE_REVIEWED, processContext.getResultsOfSwissprotRemapping().size() + " Swissprot entries on the initial " + initialNumberOfBlastProtein + " matching Swissprot proteins have a conflict between their sequence and some feature ranges of the protein " + processContext.getIntactAccession()); report.setStatus(status); return processContext.getTremblAccession(); } else { if (report.getBlastMatchingProteins().size() == 1) { BlastResults swissprot = report.getBlastMatchingProteins().iterator().next(); Status status = new Status( StatusLabel.COMPLETED, "We don't have any conflicts between the sequence of the Swissprot entry " + swissprot.getAccession() + " and the feature ranges of the protein " + processContext.getIntactAccession()); report.setStatus(status); return swissprot.getAccession(); } else { Status status = new Status( StatusLabel.COMPLETED, "We don't have any conflicts between the sequence(s) of the " + report.getBlastMatchingProteins().size() + " possible Swissprot proteins and the feature ranges of the protein " + processContext.getIntactAccession()); report.setStatus(status); ArrayList<BlastResults> proteins = new ArrayList<BlastResults>(); // merge the isoforms proteins.addAll(report.getBlastMatchingProteins()); Set<String> accessions = mergeIsoformsFromBlastProteins(proteins); if (accessions.size() == 1) { return accessions.iterator().next(); } else { return processContext.getTremblAccession(); } } } } }