示例#1
0
 /**
  * This is the constructor that code should typically call when a SequencesReader is already
  * available.
  *
  * @param genome SDF containing sequences and reference specification file.
  * @param sex for this instance.
  * @param fallback create a default ReferenceGenome of the given type when no reference file is
  *     present, if null and there is no reference an error will be thrown
  * @throws IOException when actual I/O error or problems in file definition.
  */
 public ReferenceGenome(final SequencesReader genome, final Sex sex, DefaultFallback fallback)
     throws IOException {
   try (BufferedReader r = getReferenceReader(genome, fallback)) {
     parse(ReaderUtils.getSequenceLengthMap(genome), r, sex);
   }
 }
示例#2
0
 /**
  * Constructor for testing.
  *
  * @param genome with names of sequences.
  * @param reference contents of description of reference genome.
  * @param sex for this instance.
  * @throws IOException when actual I/O error or problems in file definition.
  */
 public ReferenceGenome(final SequencesReader genome, final Reader reference, final Sex sex)
     throws IOException {
   parse(ReaderUtils.getSequenceLengthMap(genome), reference, sex);
 }
示例#3
0
  public void testExclusionCases() throws IOException {
    final StringReader reader = new StringReader(PSEUDO_REF_2);
    final MockSequencesReader genome = new MockSequencesReader(SequenceType.DNA, 3, 300000);
    final ReferenceGenome rg = new ReferenceGenome(genome, reader, Sex.MALE);
    final Map<String, Long> sequenceNameMap = ReaderUtils.getSequenceNameMap(genome);
    // front overlap
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(1, 5000, 1, 59999, 4500, 59999)},
        new HashingRegion[] {new HashingRegion(1, 5000, 1, 65000, 4500, 65500)});
    // rear overlap
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(1, 80000, 1, 90000, 80000, 90500)},
        new HashingRegion[] {new HashingRegion(1, 70000, 1, 90000, 69500, 90500)});
    // region entirely within PAR region
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {},
        new HashingRegion[] {new HashingRegion(1, 70000, 1, 75000, 69500, 75500)});
    // region entirely within PAR region
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {},
        new HashingRegion[] {new HashingRegion(1, 60000, 1, 80000, 59500, 80500)});
    // region entirely within PAR region
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {},
        new HashingRegion[] {new HashingRegion(1, 59999, 1, 80000, 59500, 80500)});
    // region entirely consumes PAR region
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {
          new HashingRegion(1, 55000, 1, 59999, 54500, 59999),
          new HashingRegion(1, 80000, 1, 85000, 80000, 85500)
        },
        new HashingRegion[] {new HashingRegion(1, 55000, 1, 85000, 54500, 85500)});
    // region on edges
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {
          new HashingRegion(1, 55000, 1, 59999, 54500, 59999),
          new HashingRegion(1, 80000, 1, 85000, 80000, 85500)
        },
        new HashingRegion[] {
          new HashingRegion(1, 55000, 1, 60000, 54500, 60500),
          new HashingRegion(1, 60000, 1, 80000, 59500, 80500),
          new HashingRegion(1, 80000, 1, 85000, 79500, 85500)
        });

    // front overlap padding
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(1, 5000, 1, 59999, 4500, 59999)},
        new HashingRegion[] {new HashingRegion(1, 5000, 1, 65000, 4500, 70000)});
    // rear overlap padding
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(1, 80000, 1, 90000, 80000, 95000)},
        new HashingRegion[] {new HashingRegion(1, 70000, 1, 90000, 65000, 95000)});

    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(0, 50000, 1, 59999, 49500, 59999)},
        new HashingRegion[] {new HashingRegion(0, 50000, 1, 65000, 49500, 65500)});
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(1, 80000, 2, 50000, 80000, 50500)},
        new HashingRegion[] {new HashingRegion(1, 70000, 2, 50000, 69500, 50500)});
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {
          new HashingRegion(0, 50000, 1, 59999, 49500, 59999),
          new HashingRegion(1, 80000, 1, 90000, 80000, 90500)
        },
        new HashingRegion[] {new HashingRegion(0, 50000, 1, 90000, 49500, 90500)});
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {
          new HashingRegion(1, 50000, 1, 59999, 49500, 59999),
          new HashingRegion(1, 80000, 2, 50000, 80000, 50500)
        },
        new HashingRegion[] {new HashingRegion(1, 50000, 2, 50000, 49500, 50500)});
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {
          new HashingRegion(0, 50000, 1, 59999, 49500, 59999),
          new HashingRegion(1, 80000, 2, 50000, 80000, 50500)
        },
        new HashingRegion[] {new HashingRegion(0, 50000, 2, 50000, 49500, 50500)});

    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(0, 50000, 1, 59999, 49500, 59999)},
        new HashingRegion[] {new HashingRegion(0, 50000, 1, 70000, 49500, 70500)});
    checkExclusionAndPadding(
        rg,
        sequenceNameMap,
        new HashingRegion[] {new HashingRegion(1, 80000, 2, 50000, 80000, 50500)},
        new HashingRegion[] {new HashingRegion(1, 70000, 2, 50000, 69500, 50500)});
  }