/**
   * This will group by the rank, inspect higher taxa and attempt to merge where possible. In simple
   * terms, grouped at rank with common values d: a,-,c,d a,b,-,d -,b,-,d will merge into a,b,c,d
   * where NO CONFLICTS exist. Introducing a conflict: a,-,c,d a,b,-,d -,b,-,d e,-,-,d will produce:
   * a,b,c,d e,-,-,d
   *
   * @param rank The rank being operated on
   * @param denorm To merge across. Must be sorted to the rank declared before calling
   */
  public void merge(
      final LINNEAN_RANK rank,
      List<DenormClassification> denorm,
      Map<LINNEAN_RANK, Set<String>> homonyms) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Starting merging classifications[{}] at rank[{}]", denorm.size(), rank);
      DenormClassificationUtils.debug(denorm);
    }

    // holds the group of things to merge
    List<DenormClassification> workingGroup = new ArrayList<DenormClassification>();

    // the previous record
    DenormClassification prev = null;

    for (DenormClassification curr : denorm) {
      if (prev == null) { // first record
        workingGroup.add(curr);
      } else {
        if (!StringUtils.equals(curr.get(rank), prev.get(rank))) {
          // there is a change, so perform any necessary merging and copy to merged
          inferHigherTaxa(denorm, workingGroup, rank, homonyms);

          // refresh the working group
          workingGroup.clear();
        }
        workingGroup.add(curr);
      }
      prev = curr;
    }

    // ensure the last row is handled
    inferHigherTaxa(denorm, workingGroup, rank, homonyms);
  }
  /**
   * Inspects the group which should all have the same value at the declared rank. This will infer
   * higher taxa for each row where there are no conflicts. For each rank starting kingdom then
   * phylum etc: a) for each that is null at that rank, consider any with a non-null value at that
   * rank as candidate for copying the rank value b) check each candidate and remove it from the
   * potential candidates if there is a conflict c) if there is 1 candidate at the end use it,
   * otherwise, it cannot be used
   *
   * @param source To infer what is possible
   * @param rank the most significant rank being operated on (inclusive) Passing rank of genus,
   *     means you infer k,p,g,o,f and genus
   */
  private void inferHigherTaxa(
      List<DenormClassification> taxonomy,
      List<DenormClassification> group,
      final LINNEAN_RANK rank,
      Map<LINNEAN_RANK, Set<String>> homonyms) {

    if (group.size() <= 1) {
      LOG.debug(
          "Nothing to merge at rank[{}] since there is/are {} classification(s)",
          rank,
          group.size());

    } else if (StringUtils.isBlank(group.get(0).get(rank))) {
      LOG.debug(
          "Skipping merging of group since the group represents a group with null at the rank");
    } else {
      LOG.info(
          "Merging classifications[{}] at rank[{}] for group: {}",
          new Object[] {group.size(), rank, group.get(0).get(rank)});
      if (LOG.isDebugEnabled()) {
        DenormClassificationUtils.debug(group);
      }

      // we know we will receive a lot of duplicates, so extract them for performance
      Map<String, DenormClassification> distinctClassifications =
          distinctClassifications(group, rank);

      // inspect from the highest rank to the working rank in order, inferring as we go
      for (LINNEAN_RANK r : LinneanRank.ranksHigherThan(rank, false)) {

        // get the "sparse" classifications (e.g. with a null at the rank in question)
        List<DenormClassification> sparseRecords = new ArrayList<DenormClassification>();
        for (DenormClassification d : group) {
          if (StringUtils.isBlank(d.get(r))) {
            sparseRecords.add(d);
          }
        }

        // don't continue if there are no sparse records
        if (sparseRecords.size() < 1) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("No classification(s) is/are empty at rank[{}]", r);
            DenormClassificationUtils.debug(group);
          }

        } else {
          if (LOG.isDebugEnabled()) {
            LOG.debug(sparseRecords.size() + " classification(s) is/are empty at rank[{}]", r);
            DenormClassificationUtils.debug(group);
          }

          // a classification with a value is a candidate
          List<DenormClassification> candidates = new ArrayList<DenormClassification>();
          for (DenormClassification dc : distinctClassifications.values()) {
            if (StringUtils.isNotBlank(dc.get(r))) {
              candidates.add(dc);
            }
          }
          LOG.debug(
              "{} classification(s) is/are potential candidate(s) from which rank[{}] might be inferred",
              candidates.size(),
              r);

          // for each, check against those with values at the rank
          if (!candidates.isEmpty()) {
            for (DenormClassification d : sparseRecords) {
              LOG.debug("Attempting to infer rank[{}] for: {}", r, d);

              // Check each candidate, and add it to the options
              Set<String> potentials = new HashSet<String>();
              for (DenormClassification candidate : candidates) {

                // if they conflict in higher taxonomy, remove it from the candidate list
                if (DenormClassificationUtils.haveConflict(d, candidate, r)) {
                  LOG.debug(
                      "Ignoring candidate from potential options due to a conflict: {}", candidate);

                } else if (DenormClassificationUtils.shareHigherTaxonomy(d, candidate, r)) {
                  LOG.debug(
                      "Adding option[{}] for rank[{}] from candidate: {}",
                      new Object[] {candidate.get(r), r, candidate});
                  potentials.add(candidate.get(r));
                } else {
                  // since we are doing ranks in order, the higher taxa must be identical, or
                  // we have ambiguity. Consider:
                  // a,b,c
                  // -,-,c
                  // d,-,c
                  // If we are on the middle rank and the second row, we see no conflict to
                  // a,b,c but cannot assume b
                  LOG.debug(
                      "Ignoring candidate from potential options due to ambiguity: {}", candidate);
                }
              }

              // within this rank we have now the candidates, but consider working at the 3rd column
              // in:
              // a - c d -> this row could be "b" but not definitely because:
              // - b c d -> this row could be "e" or "a"
              // e - - d
              // we would now set the a or b happily, ignorant of the homonym at a lower rank with
              // empty
              // 3rd column
              // To counter this, we hunt for homonyms at any lower rank with a null at the rank we
              // are working at
              boolean homonymFound = false;
              if (StringUtils.isNotBlank(d.get(rank))) {
                // homonymFound = homonymInLowerRankScan(taxonomy,rank, d);
                homonymFound = homonymInLowerRank(rank, homonyms, d);
              }

              if (homonymFound) { // if homonyms exist, one cannot make inferences
                LOG.debug("Homonyms found, so rank[{}] cannot be inferred for: {}", r, d);
              } else if (potentials.size() == 1) { // if there is only one option, use it
                String value = potentials.iterator().next();
                LOG.debug(
                    "{} classification(s) unanimously provided option[{}] at rank[{}] for: {}",
                    new Object[] {candidates.size(), value, r, d});
                d.set(r, value);
              } else {
                LOG.debug(
                    "{} classification(s) provided {} options at rank[{}], so cannot be inferred for: {}",
                    new Object[] {candidates.size(), potentials.size(), r, d});
                // this means this group represents a homonym which should be stored for future
                // decisions merging
                // higher taxa
                if (StringUtils.isNotBlank(d.get(rank))) {
                  LOG.debug("Adding homonym[{}] to rank[{}]", d.get(rank), rank);
                  homonyms.get(rank).add(d.get(rank));
                }
              }
            }
          }
        }
      }
    }
  }