/**
  * Retrieves all Proteins, cross references and matches for a range
  *
  * @param bottom range lower bound (included)
  * @param top range upper bound (included)
  * @return The Protein, with matches loaded. (matches are LAZY by default) or null if the primary
  *     key is not present in the database.
  */
 @Override
 @Transactional(readOnly = true)
 @SuppressWarnings("unchecked")
 public List<Protein> getProteinsAndMatchesAndCrossReferencesBetweenIds(long bottom, long top) {
   Query query =
       entityManager.createQuery(
           "select distinct p from Protein p "
               + "left outer join fetch p.matches "
               + "left outer join fetch p.crossReferences where p.id >= :bottom and p.id <= :top");
   query.setParameter("bottom", bottom);
   query.setParameter("top", top);
   List<Protein> matchingProteins = (List<Protein>) query.getResultList();
   if (LOGGER.isTraceEnabled()) {
     LOGGER.trace("Querying proteins with IDs in range: " + bottom + " to " + top);
     LOGGER.trace("Matching protein count: " + matchingProteins.size());
     for (Protein protein : matchingProteins) {
       LOGGER.trace("Protein ID: " + protein.getId() + " MD5: " + protein.getMd5());
       LOGGER.trace("Has " + protein.getMatches().size() + " matches");
       for (ProteinXref xref : protein.getCrossReferences()) {
         LOGGER.trace("Xref: " + xref.getIdentifier());
       }
     }
   }
   return matchingProteins;
 }
 @Test
 public void testMarshalPhobiusMatch() {
   init();
   final Set<PhobiusMatch.PhobiusLocation> locations = new HashSet<PhobiusMatch.PhobiusLocation>();
   locations.add(new PhobiusMatch.PhobiusLocation(1, 2));
   final Signature signature =
       new Signature.Builder("SIGNAL_PEPTIDE").name("Signal Peptide").build();
   final PhobiusMatch match = new PhobiusMatch(signature, locations);
   final Protein protein = new Protein("aaa");
   protein.addMatch(match);
   try {
     final String result = marshal(marshaller, protein);
     if (LOGGER.isDebugEnabled()) {
       LOGGER.debug(result);
     }
     assertNotNull("XML result shouldn't be null!", result);
     assertTrue(result.contains("<matches>"));
     assertTrue(result.contains("<phobius-match>"));
     assertTrue(result.contains("<signature"));
     assertTrue(result.contains("name=\"Signal Peptide\""));
     assertTrue(result.contains("ac=\"SIGNAL_PEPTIDE\""));
     assertTrue(result.contains("<locations>"));
     assertTrue(result.contains("<phobius-location"));
     assertTrue(result.contains("end=\"2\""));
     assertTrue(result.contains("start=\"1\""));
   } catch (IOException e) {
     LOGGER.warn("Couldn't marshal protein object!", e);
   }
 }
  /**
   * Writes out all protein matches for the specified protein (GFF formatted).
   *
   * @param protein containing matches to be written out
   * @return the number of rows printed (i.e. the number of Locations on Matches).
   * @throws java.io.IOException in the event of I/O problem writing out the file.
   */
  public int write(Protein protein) throws IOException {
    List<String> proteinIdsForGFF = getProteinAccessions(protein);

    int sequenceLength = protein.getSequenceLength();
    String md5 = protein.getMd5();
    String date = dmyFormat.format(new Date());
    Set<Match> matches = protein.getMatches();
    // Write sequence region information
    for (String proteinIdForGFF : proteinIdsForGFF) {
      if (matches.size() > 0) {
        // Check if protein accessions are GFF3 valid
        proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF);
        // Write sequence-region
        super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength);
        if (writeFullGFF) {
          writeReferenceLine(proteinIdForGFF, sequenceLength, md5);
          addFASTASeqToMap(proteinIdForGFF, protein.getSequence());
        }
        processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF);
      } // end match size check
    }
    return 0;
  }
  /**
   * Writes out protein view to an zipped and compressed HTML file.
   *
   * @param protein containing matches to be written out
   * @return the number of rows printed (i.e. the number of Locations on Matches).
   * @throws java.io.IOException in the event of I/O problem writing out the file.
   */
  public int write(final Protein protein) throws IOException {
    checkEntryHierarchy();

    if (entryHierarchy != null) {
      for (ProteinXref xref : protein.getCrossReferences()) {
        final SimpleProtein simpleProtein = SimpleProtein.valueOf(protein, xref, entryHierarchy);
        if (simpleProtein != null) {
          // Build model for FreeMarker
          final SimpleHash model = buildModelMap(simpleProtein, entryHierarchy);
          // Render template and write result to a file
          Writer writer = null;
          try {
            final Template temp = freeMarkerConfig.getTemplate(freeMarkerTemplate);
            checkTempDirectory(tempDirectory);
            if (!tempDirectory.endsWith("/")) {
              tempDirectory = tempDirectory + "/";
            }

            UrlFriendlyIdGenerator gen = UrlFriendlyIdGenerator.getInstance();
            String urlFriendlyId = gen.generate(xref.getIdentifier());
            final File newResultFile = new File(tempDirectory + urlFriendlyId + ".svg");
            resultFiles.add(newResultFile);
            writer = new PrintWriter(new FileWriter(newResultFile));
            temp.process(model, writer);
            writer.flush();
          } catch (TemplateException e) {
            e.printStackTrace();
          } catch (IOException e) {
            e.printStackTrace();
          } finally {
            if (writer != null) {
              writer.close();
            }
          }
        }
      }
    }
    return 0;
  }
  /**
   * Inserts new Proteins. If there are Protein objects with the same MD5 / sequence in the
   * database, this method updates these proteins, rather than inserting the new ones.
   *
   * <p>Note that this method inserts the new Protein objects AND and new Xrefs (possibly updating
   * an existing Protein object if necessary with the new Xref.)
   *
   * @param newProteins being a List of new Protein objects to insert
   * @return a new List<Protein> containing all of the inserted / updated Protein objects. (Allows
   *     the caller to retrieve the primary keys for the proteins).
   */
  @Transactional
  @SuppressWarnings("unchecked")
  public PersistedProteins insertNewProteins(Collection<Protein> newProteins) {
    PersistedProteins persistentProteins = new PersistedProteins();
    if (newProteins.size() > 0) {
      // Create a List of MD5s (just as Strings) to query the database with
      final List<String> newMd5s = new ArrayList<String>(newProteins.size());
      for (Protein newProtein : newProteins) {
        newMd5s.add(newProtein.getMd5());
        if (LOGGER.isDebugEnabled()) {
          LOGGER.debug("MD5 of new protein: " + newProtein.getMd5());
        }
      }
      // Retrieve any proteins AND associated xrefs that have the same MD5 as one of the 'new'
      // proteins
      // being inserted and place in a Map of MD5 to Protein object.
      final Map<String, Protein> md5ToExistingProtein = new HashMap<String, Protein>();
      final Query query =
          entityManager.createQuery(
              "select p from Protein p left outer join fetch P.crossReferences where p.md5 in (:md5)");
      query.setParameter("md5", newMd5s);
      for (Protein existingProtein : (List<Protein>) query.getResultList()) {
        if (LOGGER.isDebugEnabled()) {
          LOGGER.debug("Found 1 existing protein with MD5: " + existingProtein.getMd5());
        }
        md5ToExistingProtein.put(existingProtein.getMd5(), existingProtein);
      }

      // Now have the List of 'new' proteins, and a list of existing proteins that match
      // them. Insert / update proteins as appropriate.
      for (Protein candidate : newProteins) {

        // PROTEIN ALREADY EXISTS in the DB. - update cross references and save.
        if (md5ToExistingProtein.keySet().contains(candidate.getMd5())) {
          // This protein is already in the database - add any new Xrefs and update.
          Protein existingProtein = md5ToExistingProtein.get(candidate.getMd5());
          boolean updateRequired = false;
          if (candidate.getCrossReferences() != null) {
            if (LOGGER.isTraceEnabled()) {
              LOGGER.trace("Protein TO BE STORED has xrefs:");
            }
            for (ProteinXref xref : candidate.getCrossReferences()) {
              if (LOGGER.isTraceEnabled()) {
                LOGGER.trace(xref.getIdentifier());
              }
              // Add any NEW cross references.
              if (!existingProtein.getCrossReferences().contains(xref)) {
                if (LOGGER.isTraceEnabled()) {
                  LOGGER.trace(
                      "Adding " + xref.getIdentifier() + " and setting updateRequired = true");
                }
                existingProtein.addCrossReference(xref);
                updateRequired = true;
              }
            }
          }
          if (updateRequired) {
            // PROTEIN is NOT new, but CHANGED (new Xrefs)
            if (LOGGER.isTraceEnabled()) {
              LOGGER.trace("Merging protein with new Xrefs: " + existingProtein.getMd5());
            }
            entityManager.merge(existingProtein);
          }
          persistentProteins.addPreExistingProtein(existingProtein);
        }
        // PROTEIN IS NEW - save it.
        else {
          if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("Saving new protein: " + candidate.getMd5());
          }
          entityManager.persist(candidate);
          persistentProteins.addNewProtein(candidate);
          // Check for this new protein next time through the loop, just in case the new source of
          // proteins is redundant (e.g. a FASTA file with sequences repeated).
          md5ToExistingProtein.put(candidate.getMd5(), candidate);
        }
      }
    }
    // Finally return all the persisted Protein objects (new or existing)
    entityManager.flush();
    return persistentProteins;
  }