/** * Retrieves all Proteins, cross references and matches for a range * * @param bottom range lower bound (included) * @param top range upper bound (included) * @return The Protein, with matches loaded. (matches are LAZY by default) or null if the primary * key is not present in the database. */ @Override @Transactional(readOnly = true) @SuppressWarnings("unchecked") public List<Protein> getProteinsAndMatchesAndCrossReferencesBetweenIds(long bottom, long top) { Query query = entityManager.createQuery( "select distinct p from Protein p " + "left outer join fetch p.matches " + "left outer join fetch p.crossReferences where p.id >= :bottom and p.id <= :top"); query.setParameter("bottom", bottom); query.setParameter("top", top); List<Protein> matchingProteins = (List<Protein>) query.getResultList(); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Querying proteins with IDs in range: " + bottom + " to " + top); LOGGER.trace("Matching protein count: " + matchingProteins.size()); for (Protein protein : matchingProteins) { LOGGER.trace("Protein ID: " + protein.getId() + " MD5: " + protein.getMd5()); LOGGER.trace("Has " + protein.getMatches().size() + " matches"); for (ProteinXref xref : protein.getCrossReferences()) { LOGGER.trace("Xref: " + xref.getIdentifier()); } } } return matchingProteins; }
@Test public void testMarshalPhobiusMatch() { init(); final Set<PhobiusMatch.PhobiusLocation> locations = new HashSet<PhobiusMatch.PhobiusLocation>(); locations.add(new PhobiusMatch.PhobiusLocation(1, 2)); final Signature signature = new Signature.Builder("SIGNAL_PEPTIDE").name("Signal Peptide").build(); final PhobiusMatch match = new PhobiusMatch(signature, locations); final Protein protein = new Protein("aaa"); protein.addMatch(match); try { final String result = marshal(marshaller, protein); if (LOGGER.isDebugEnabled()) { LOGGER.debug(result); } assertNotNull("XML result shouldn't be null!", result); assertTrue(result.contains("<matches>")); assertTrue(result.contains("<phobius-match>")); assertTrue(result.contains("<signature")); assertTrue(result.contains("name=\"Signal Peptide\"")); assertTrue(result.contains("ac=\"SIGNAL_PEPTIDE\"")); assertTrue(result.contains("<locations>")); assertTrue(result.contains("<phobius-location")); assertTrue(result.contains("end=\"2\"")); assertTrue(result.contains("start=\"1\"")); } catch (IOException e) { LOGGER.warn("Couldn't marshal protein object!", e); } }
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); // Write sequence region information for (String proteinIdForGFF : proteinIdsForGFF) { if (matches.size() > 0) { // Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); // Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); } // end match size check } return 0; }
/** * Writes out protein view to an zipped and compressed HTML file. * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(final Protein protein) throws IOException { checkEntryHierarchy(); if (entryHierarchy != null) { for (ProteinXref xref : protein.getCrossReferences()) { final SimpleProtein simpleProtein = SimpleProtein.valueOf(protein, xref, entryHierarchy); if (simpleProtein != null) { // Build model for FreeMarker final SimpleHash model = buildModelMap(simpleProtein, entryHierarchy); // Render template and write result to a file Writer writer = null; try { final Template temp = freeMarkerConfig.getTemplate(freeMarkerTemplate); checkTempDirectory(tempDirectory); if (!tempDirectory.endsWith("/")) { tempDirectory = tempDirectory + "/"; } UrlFriendlyIdGenerator gen = UrlFriendlyIdGenerator.getInstance(); String urlFriendlyId = gen.generate(xref.getIdentifier()); final File newResultFile = new File(tempDirectory + urlFriendlyId + ".svg"); resultFiles.add(newResultFile); writer = new PrintWriter(new FileWriter(newResultFile)); temp.process(model, writer); writer.flush(); } catch (TemplateException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { writer.close(); } } } } } return 0; }
/** * Inserts new Proteins. If there are Protein objects with the same MD5 / sequence in the * database, this method updates these proteins, rather than inserting the new ones. * * <p>Note that this method inserts the new Protein objects AND and new Xrefs (possibly updating * an existing Protein object if necessary with the new Xref.) * * @param newProteins being a List of new Protein objects to insert * @return a new List<Protein> containing all of the inserted / updated Protein objects. (Allows * the caller to retrieve the primary keys for the proteins). */ @Transactional @SuppressWarnings("unchecked") public PersistedProteins insertNewProteins(Collection<Protein> newProteins) { PersistedProteins persistentProteins = new PersistedProteins(); if (newProteins.size() > 0) { // Create a List of MD5s (just as Strings) to query the database with final List<String> newMd5s = new ArrayList<String>(newProteins.size()); for (Protein newProtein : newProteins) { newMd5s.add(newProtein.getMd5()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("MD5 of new protein: " + newProtein.getMd5()); } } // Retrieve any proteins AND associated xrefs that have the same MD5 as one of the 'new' // proteins // being inserted and place in a Map of MD5 to Protein object. final Map<String, Protein> md5ToExistingProtein = new HashMap<String, Protein>(); final Query query = entityManager.createQuery( "select p from Protein p left outer join fetch P.crossReferences where p.md5 in (:md5)"); query.setParameter("md5", newMd5s); for (Protein existingProtein : (List<Protein>) query.getResultList()) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Found 1 existing protein with MD5: " + existingProtein.getMd5()); } md5ToExistingProtein.put(existingProtein.getMd5(), existingProtein); } // Now have the List of 'new' proteins, and a list of existing proteins that match // them. Insert / update proteins as appropriate. for (Protein candidate : newProteins) { // PROTEIN ALREADY EXISTS in the DB. - update cross references and save. if (md5ToExistingProtein.keySet().contains(candidate.getMd5())) { // This protein is already in the database - add any new Xrefs and update. Protein existingProtein = md5ToExistingProtein.get(candidate.getMd5()); boolean updateRequired = false; if (candidate.getCrossReferences() != null) { if (LOGGER.isTraceEnabled()) { LOGGER.trace("Protein TO BE STORED has xrefs:"); } for (ProteinXref xref : candidate.getCrossReferences()) { if (LOGGER.isTraceEnabled()) { LOGGER.trace(xref.getIdentifier()); } // Add any NEW cross references. if (!existingProtein.getCrossReferences().contains(xref)) { if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Adding " + xref.getIdentifier() + " and setting updateRequired = true"); } existingProtein.addCrossReference(xref); updateRequired = true; } } } if (updateRequired) { // PROTEIN is NOT new, but CHANGED (new Xrefs) if (LOGGER.isTraceEnabled()) { LOGGER.trace("Merging protein with new Xrefs: " + existingProtein.getMd5()); } entityManager.merge(existingProtein); } persistentProteins.addPreExistingProtein(existingProtein); } // PROTEIN IS NEW - save it. else { if (LOGGER.isTraceEnabled()) { LOGGER.trace("Saving new protein: " + candidate.getMd5()); } entityManager.persist(candidate); persistentProteins.addNewProtein(candidate); // Check for this new protein next time through the loop, just in case the new source of // proteins is redundant (e.g. a FASTA file with sequences repeated). md5ToExistingProtein.put(candidate.getMd5(), candidate); } } } // Finally return all the persisted Protein objects (new or existing) entityManager.flush(); return persistentProteins; }