/**
   * This implementation does the following: a) infers missing genera b) sorts to the scientific
   * name c) does a homonym aware merge to fill holes in a classification: a,-,c,d a,b,-,d would
   * merge to a,b,c,d.
   */
  public List<NormClassification> normalize(List<DenormClassification> denorm) {
    List<NormClassification> result = new ArrayList<NormClassification>();

    // infer missing values
    DenormClassificationUtils.inferSpecies(denorm);
    DenormClassificationUtils.inferGenera(denorm);

    Map<LINNEAN_RANK, Set<String>> homonyms = createHomonymCache();

    // respecting homonymns, merge higher classification into as few as possible
    // a,-,c,d
    // a,b,-,d
    // would merge to a,b,c,d for example
    long time = System.currentTimeMillis();
    sortAndMerge(LINNEAN_RANK.SS, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.S, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.G, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.F, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.O, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.C, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.P, denorm, homonyms);
    sortAndMerge(LINNEAN_RANK.K, denorm, homonyms);
    LOG.info(
        "Completed classification merging at all ranks in {} sec(s)",
        (1 + System.currentTimeMillis() - time) / 1000);

    // now resort to ensure correct ordering from the bottom up
    Collections.sort(denorm, DenormClassificationUtils.FULL_COMPARATOR);

    time = System.currentTimeMillis();
    LOG.info("Building normalized tree structure for {} classifications", denorm.size());
    int id = 1;
    Map<Integer, NormClassification> norm = new HashMap<Integer, NormClassification>();
    Map<LINNEAN_RANK, Integer> parentIds = new HashMap<LINNEAN_RANK, Integer>();
    DenormClassification prev = null;
    for (DenormClassification curr : denorm) {
      // capture first row
      boolean change = prev == null;

      // find where they differ
      LINNEAN_RANK deviation =
          change ? LINNEAN_RANK.K : DenormClassificationUtils.rankOfDeviation(curr, prev);
      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "Deviation with previous is at rank[{}] for curr[{}] prev[{}]",
            new Object[] {deviation, curr, prev});
      }

      for (LINNEAN_RANK r : LinneanRank.ranksLowerThan(deviation, true)) {
        // clear parentIds not of interest now
        parentIds.put(r, null);

        String name = curr.get(r);
        if (StringUtils.isNotBlank(name)) {
          // find the parent id to use for this taxon
          Integer parentId = null;
          for (LINNEAN_RANK p : LinneanRank.ranksHigherThan(r, false)) {
            parentId = parentIds.get(p) == null ? parentId : parentIds.get(p);
          }

          // create the taxon
          String author = null;
          if (LINNEAN_RANK.S == r && StringUtils.isBlank(curr.get(LINNEAN_RANK.SS))
              || LINNEAN_RANK.SS == r) {
            author = curr.getAuthor();
          }
          NormClassification nc = new NormClassification(id, parentId, name, author, r.toString());
          norm.put(id, nc);
          parentIds.put(r, id);
          id++;

          // we have just created the concept, but if this is the most significant taxa,
          // then we need to track any payloads on the newly create concept
          boolean more = false;
          for (LINNEAN_RANK r2 : LinneanRank.ranksLowerThan(r, false)) {
            more |= StringUtils.isNotBlank(curr.get(r2));
          }
          if (!more) {
            LOG.debug("Adding payloads from [{}] into [{}]", curr.toString(), nc.toString());
            nc.getPayloads().addAll(curr.getPayloads());
          }
        }
      }

      // handle the special case when you have
      // "a",null,null,null,null,"f","g","h","i"));
      // "a",null,null,null,null,"f","g","j","i"));
      // "a",null,null,null,null,"f","g",null,"i"));
      // on the 3rd row, we have already created the species, but need to apply the author and
      // update the payloads
      if (LINNEAN_RANK.SS == deviation
          && prev != null
          && StringUtils.isBlank(curr.getSubspecies())
          && // we don't want second row to go in here
          StringUtils.equals(curr.get(LINNEAN_RANK.S), prev.get(LINNEAN_RANK.S))) {
        NormClassification prevNorm = norm.get(id - 1);

        // iterate back to the species concept
        while (prevNorm != null
            && !StringUtils.equals(prevNorm.getRank(), LINNEAN_RANK.S.toString())) {
          if (prevNorm.getParentId() == null) {
            prevNorm = null;
            break;
          } else {
            prevNorm = norm.get(prevNorm.getParentId());
          }
        }
        LOG.debug("Previous species: " + prevNorm);
        if (prevNorm != null && StringUtils.equals(prevNorm.getRank(), LINNEAN_RANK.S.toString())) {
          LOG.debug(
              "Updating previous species concept with new author[{}]: {}",
              curr.getAuthor(),
              prevNorm);
          prevNorm.setAuthor(curr.getAuthor());
          LOG.debug(
              "Updating previous payloads from [{}] into previous [{}]",
              curr.toString(),
              prevNorm.toString());
          prevNorm.getPayloads().addAll(curr.getPayloads());
        }
      }

      prev = curr;
    }
    result.addAll(norm.values());
    Collections.sort(
        result,
        new Comparator<NormClassification>() {

          @Override
          public int compare(NormClassification o1, NormClassification o2) {
            return o1.getId().compareTo(o2.getId());
          }
        });
    LOG.info(
        "Built normalized tree structure for {} classifications in {} sec(s)",
        denorm.size(),
        (1 + System.currentTimeMillis() - time) / 1000);

    return result;
  }