/**
  * Insert a List of new Protein instances.
  *
  * @param newInstances being a List of instances to persist.
  * @return the Collection of persisted instances. This MAY NOT contain the same objects as have
  *     been passed in, for sub-classes that check for the pre-existence of the object in the
  *     database.
  */
 @Override
 @Transactional
 public Collection<Protein> insert(Collection<Protein> newInstances) {
   final PersistedProteins persistedProteins = insertNewProteins(newInstances);
   final Collection<Protein> allProteins =
       new ArrayList<Protein>(persistedProteins.getNewProteins());
   allProteins.addAll(persistedProteins.getPreExistingProteins());
   entityManager.flush();
   return allProteins;
 }
  /**
   * Inserts new Proteins. If there are Protein objects with the same MD5 / sequence in the
   * database, this method updates these proteins, rather than inserting the new ones.
   *
   * <p>Note that this method inserts the new Protein objects AND and new Xrefs (possibly updating
   * an existing Protein object if necessary with the new Xref.)
   *
   * @param newProteins being a List of new Protein objects to insert
   * @return a new List<Protein> containing all of the inserted / updated Protein objects. (Allows
   *     the caller to retrieve the primary keys for the proteins).
   */
  @Transactional
  @SuppressWarnings("unchecked")
  public PersistedProteins insertNewProteins(Collection<Protein> newProteins) {
    PersistedProteins persistentProteins = new PersistedProteins();
    if (newProteins.size() > 0) {
      // Create a List of MD5s (just as Strings) to query the database with
      final List<String> newMd5s = new ArrayList<String>(newProteins.size());
      for (Protein newProtein : newProteins) {
        newMd5s.add(newProtein.getMd5());
        if (LOGGER.isDebugEnabled()) {
          LOGGER.debug("MD5 of new protein: " + newProtein.getMd5());
        }
      }
      // Retrieve any proteins AND associated xrefs that have the same MD5 as one of the 'new'
      // proteins
      // being inserted and place in a Map of MD5 to Protein object.
      final Map<String, Protein> md5ToExistingProtein = new HashMap<String, Protein>();
      final Query query =
          entityManager.createQuery(
              "select p from Protein p left outer join fetch P.crossReferences where p.md5 in (:md5)");
      query.setParameter("md5", newMd5s);
      for (Protein existingProtein : (List<Protein>) query.getResultList()) {
        if (LOGGER.isDebugEnabled()) {
          LOGGER.debug("Found 1 existing protein with MD5: " + existingProtein.getMd5());
        }
        md5ToExistingProtein.put(existingProtein.getMd5(), existingProtein);
      }

      // Now have the List of 'new' proteins, and a list of existing proteins that match
      // them. Insert / update proteins as appropriate.
      for (Protein candidate : newProteins) {

        // PROTEIN ALREADY EXISTS in the DB. - update cross references and save.
        if (md5ToExistingProtein.keySet().contains(candidate.getMd5())) {
          // This protein is already in the database - add any new Xrefs and update.
          Protein existingProtein = md5ToExistingProtein.get(candidate.getMd5());
          boolean updateRequired = false;
          if (candidate.getCrossReferences() != null) {
            if (LOGGER.isTraceEnabled()) {
              LOGGER.trace("Protein TO BE STORED has xrefs:");
            }
            for (ProteinXref xref : candidate.getCrossReferences()) {
              if (LOGGER.isTraceEnabled()) {
                LOGGER.trace(xref.getIdentifier());
              }
              // Add any NEW cross references.
              if (!existingProtein.getCrossReferences().contains(xref)) {
                if (LOGGER.isTraceEnabled()) {
                  LOGGER.trace(
                      "Adding " + xref.getIdentifier() + " and setting updateRequired = true");
                }
                existingProtein.addCrossReference(xref);
                updateRequired = true;
              }
            }
          }
          if (updateRequired) {
            // PROTEIN is NOT new, but CHANGED (new Xrefs)
            if (LOGGER.isTraceEnabled()) {
              LOGGER.trace("Merging protein with new Xrefs: " + existingProtein.getMd5());
            }
            entityManager.merge(existingProtein);
          }
          persistentProteins.addPreExistingProtein(existingProtein);
        }
        // PROTEIN IS NEW - save it.
        else {
          if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("Saving new protein: " + candidate.getMd5());
          }
          entityManager.persist(candidate);
          persistentProteins.addNewProtein(candidate);
          // Check for this new protein next time through the loop, just in case the new source of
          // proteins is redundant (e.g. a FASTA file with sequences repeated).
          md5ToExistingProtein.put(candidate.getMd5(), candidate);
        }
      }
    }
    // Finally return all the persisted Protein objects (new or existing)
    entityManager.flush();
    return persistentProteins;
  }