/** * Factory method to create a SetList using the supplied list to retain order. * * <p>If the list contains duplicates, these are removed (first indexed one kept). A <code>HashSet * </code> is used for the set behaviour. * * @param <E> the element type * @param list the list to decorate, must not be null * @return a new {@link SetUniqueList} * @throws NullPointerException if list is null * @since 4.0 */ public static <E> SetUniqueList<E> setUniqueList(final List<E> list) { if (list == null) { throw new NullPointerException("List must not be null"); } if (list.isEmpty()) { return new SetUniqueList<E>(list, new HashSet<E>()); } final List<E> temp = new ArrayList<E>(list); list.clear(); final SetUniqueList<E> sl = new SetUniqueList<E>(list, new HashSet<E>()); sl.addAll(temp); return sl; }
/** * Project all of the normals used to create the PoN into the reduced panel. * * @param pon Not {@code null}. * @param ctx Spark context, {@code null} indicates no spark context available, which is * supported. * @return never {@code null}. Result will contain multiple columns in each of the * ReadCountCollection attributes. */ public static TangentNormalizationResult tangentNormalizeNormalsInPoN( final PoN pon, final JavaSparkContext ctx) { // Get the list of sample names in a modifiable List final List<String> sampleNamesCopy = new ArrayList<>(pon.getSampleNames()); logger.info("Loading normalized counts..."); final ReadCountCollection coverageProfile = new ReadCountCollection( SetUniqueList.setUniqueList(pon.getTargets()), SetUniqueList.setUniqueList(sampleNamesCopy), pon.getNormalizedCounts()); logger.info("Tangent normalizing the normals (normalized by target factors) ..."); // For each sample in the PoN, tangent normalize against the qc reduced PoN. return TangentNormalizer.tangentNormalize(pon, coverageProfile, ctx); }