/** * This is the constructor that code should typically call when a SequencesReader is already * available. * * @param genome SDF containing sequences and reference specification file. * @param sex for this instance. * @param fallback create a default ReferenceGenome of the given type when no reference file is * present, if null and there is no reference an error will be thrown * @throws IOException when actual I/O error or problems in file definition. */ public ReferenceGenome(final SequencesReader genome, final Sex sex, DefaultFallback fallback) throws IOException { try (BufferedReader r = getReferenceReader(genome, fallback)) { parse(ReaderUtils.getSequenceLengthMap(genome), r, sex); } }
/** * Constructor for testing. * * @param genome with names of sequences. * @param reference contents of description of reference genome. * @param sex for this instance. * @throws IOException when actual I/O error or problems in file definition. */ public ReferenceGenome(final SequencesReader genome, final Reader reference, final Sex sex) throws IOException { parse(ReaderUtils.getSequenceLengthMap(genome), reference, sex); }
public void testExclusionCases() throws IOException { final StringReader reader = new StringReader(PSEUDO_REF_2); final MockSequencesReader genome = new MockSequencesReader(SequenceType.DNA, 3, 300000); final ReferenceGenome rg = new ReferenceGenome(genome, reader, Sex.MALE); final Map<String, Long> sequenceNameMap = ReaderUtils.getSequenceNameMap(genome); // front overlap checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(1, 5000, 1, 59999, 4500, 59999)}, new HashingRegion[] {new HashingRegion(1, 5000, 1, 65000, 4500, 65500)}); // rear overlap checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(1, 80000, 1, 90000, 80000, 90500)}, new HashingRegion[] {new HashingRegion(1, 70000, 1, 90000, 69500, 90500)}); // region entirely within PAR region checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {}, new HashingRegion[] {new HashingRegion(1, 70000, 1, 75000, 69500, 75500)}); // region entirely within PAR region checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {}, new HashingRegion[] {new HashingRegion(1, 60000, 1, 80000, 59500, 80500)}); // region entirely within PAR region checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {}, new HashingRegion[] {new HashingRegion(1, 59999, 1, 80000, 59500, 80500)}); // region entirely consumes PAR region checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] { new HashingRegion(1, 55000, 1, 59999, 54500, 59999), new HashingRegion(1, 80000, 1, 85000, 80000, 85500) }, new HashingRegion[] {new HashingRegion(1, 55000, 1, 85000, 54500, 85500)}); // region on edges checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] { new HashingRegion(1, 55000, 1, 59999, 54500, 59999), new HashingRegion(1, 80000, 1, 85000, 80000, 85500) }, new HashingRegion[] { new HashingRegion(1, 55000, 1, 60000, 54500, 60500), new HashingRegion(1, 60000, 1, 80000, 59500, 80500), new HashingRegion(1, 80000, 1, 85000, 79500, 85500) }); // front overlap padding checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(1, 5000, 1, 59999, 4500, 59999)}, new HashingRegion[] {new HashingRegion(1, 5000, 1, 65000, 4500, 70000)}); // rear overlap padding checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(1, 80000, 1, 90000, 80000, 95000)}, new HashingRegion[] {new HashingRegion(1, 70000, 1, 90000, 65000, 95000)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(0, 50000, 1, 59999, 49500, 59999)}, new HashingRegion[] {new HashingRegion(0, 50000, 1, 65000, 49500, 65500)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(1, 80000, 2, 50000, 80000, 50500)}, new HashingRegion[] {new HashingRegion(1, 70000, 2, 50000, 69500, 50500)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] { new HashingRegion(0, 50000, 1, 59999, 49500, 59999), new HashingRegion(1, 80000, 1, 90000, 80000, 90500) }, new HashingRegion[] {new HashingRegion(0, 50000, 1, 90000, 49500, 90500)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] { new HashingRegion(1, 50000, 1, 59999, 49500, 59999), new HashingRegion(1, 80000, 2, 50000, 80000, 50500) }, new HashingRegion[] {new HashingRegion(1, 50000, 2, 50000, 49500, 50500)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] { new HashingRegion(0, 50000, 1, 59999, 49500, 59999), new HashingRegion(1, 80000, 2, 50000, 80000, 50500) }, new HashingRegion[] {new HashingRegion(0, 50000, 2, 50000, 49500, 50500)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(0, 50000, 1, 59999, 49500, 59999)}, new HashingRegion[] {new HashingRegion(0, 50000, 1, 70000, 49500, 70500)}); checkExclusionAndPadding( rg, sequenceNameMap, new HashingRegion[] {new HashingRegion(1, 80000, 2, 50000, 80000, 50500)}, new HashingRegion[] {new HashingRegion(1, 70000, 2, 50000, 69500, 50500)}); }