/**
   * Create factory with the specified options, one that favors using QSeqs over all other files
   *
   * @param basecallDirectory The baseCalls directory of a complete Illumina directory. Files are
   *     found by searching relative to this folder (some of them higher up in the directory tree).
   * @param barcodesDirectory The barcodesDirectory with barcode files extracted by
   *     'ExtractIlluminaBarcodes' (optional, use basecallDirectory if not specified)
   * @param lane Which lane to iterate over.
   * @param readStructure The read structure to which output clusters will conform. When not using
   *     QSeqs, EAMSS masking(see BclParser) is run on individual reads as found in the
   *     readStructure, if the readStructure specified does not match the readStructure implied by
   *     the sequencer's output than the quality scores output may differ than what would be found
   *     in a run's QSeq files
   * @param dataTypesArg Which data types to read
   */
  public IlluminaDataProviderFactory(
      final File basecallDirectory,
      File barcodesDirectory,
      final int lane,
      final ReadStructure readStructure,
      final BclQualityEvaluationStrategy bclQualityEvaluationStrategy,
      final IlluminaDataType... dataTypesArg) {
    this.basecallDirectory = basecallDirectory;
    this.barcodesDirectory = barcodesDirectory;
    this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;

    this.lane = lane;
    /* The types of data that will be returned by any IlluminaDataProviders created by this factory.

    Note: In previous version, data of types not specified might be returned if a data type was specified
    for data residing in QSeqs (since QSeqs span multiple data types).  This is no longer the case, you
    MUST specify all data types that should be returned.*/
    final Set<IlluminaDataType> dataTypes =
        Collections.unmodifiableSet(new HashSet<IlluminaDataType>(Arrays.asList(dataTypesArg)));

    if (dataTypes.isEmpty()) {
      throw new PicardException(
          "No data types have been specified for basecall output "
              + basecallDirectory
              + ", lane "
              + lane);
    }

    this.fileUtil = new IlluminaFileUtil(basecallDirectory, barcodesDirectory, lane);

    // find what request IlluminaDataTypes we have files for and select the most preferred file
    // format available for that type
    formatToDataTypes = determineFormats(dataTypes, fileUtil);

    // find if we have any IlluminaDataType with NO available file formats and, if any exist, throw
    // an exception
    final Set<IlluminaDataType> unmatchedDataTypes =
        findUnmatchedTypes(dataTypes, formatToDataTypes);
    if (unmatchedDataTypes.size() > 0) {
      throw new PicardException(
          "Could not find a format with available files for the following data types: "
              + StringUtil.join(", ", new ArrayList<IlluminaDataType>(unmatchedDataTypes)));
    }

    log.debug(
        "The following file formats will be used by IlluminaDataProvider: "
            + StringUtil.join("," + formatToDataTypes.keySet()));

    availableTiles =
        fileUtil.getActualTiles(new ArrayList<SupportedIlluminaFormat>(formatToDataTypes.keySet()));
    if (availableTiles.isEmpty()) {
      throw new PicardException(
          "No available tiles were found, make sure that "
              + basecallDirectory.getAbsolutePath()
              + " has a lane "
              + lane);
    }

    outputMapping = new OutputMapping(readStructure);
  }
  /**
   * Decode a single line in a SAM text file.
   *
   * @param line line to decode.
   * @return A SAMReadFeature modeling that line.
   */
  @Override
  public SAMReadFeature decode(String line) {
    // we may be asked to process a header line; ignore it
    if (line.startsWith("@")) return null;

    String[] tokens = new String[expectedTokenCount];

    // split the line
    int count = ParsingUtils.splitWhitespace(line, tokens);

    // check to see if we've parsed the string into the right number of tokens (expectedTokenCount)
    if (count != expectedTokenCount)
      throw new CodecLineParsingException(
          "the SAM read line didn't have the expected number of tokens "
              + "(expected = "
              + expectedTokenCount
              + ", saw = "
              + count
              + " on "
              + "line = "
              + line
              + ")");

    final String readName = tokens[0];
    final int flags = Integer.parseInt(tokens[1]);
    final String contigName = tokens[2];
    final int alignmentStart = Integer.parseInt(tokens[3]);
    final int mapQ = Integer.parseInt(tokens[4]);
    final String cigarString = tokens[5];
    final String mateContigName = tokens[6];
    final int mateAlignmentStart = Integer.parseInt(tokens[7]);
    final int inferredInsertSize = Integer.parseInt(tokens[8]);
    final byte[] bases = StringUtil.stringToBytes(tokens[9]);
    final byte[] qualities = StringUtil.stringToBytes(tokens[10]);

    // Infer the alignment end.
    Cigar cigar = TextCigarCodec.decode(cigarString);
    int alignmentEnd = alignmentStart + cigar.getReferenceLength() - 1;

    // Remove printable character conversion from the qualities.
    for (byte quality : qualities) quality -= 33;

    return new SAMReadFeature(
        readName,
        flags,
        contigName,
        alignmentStart,
        alignmentEnd,
        mapQ,
        cigarString,
        mateContigName,
        mateAlignmentStart,
        inferredInsertSize,
        bases,
        qualities);
  }
 @Test(expectedExceptions = {SAMException.class})
 public void testUnmergeableSequenceDictionary() {
   final String sd1 = sq1 + sq2 + sq5;
   final String sd2 = sq2 + sq3 + sq4 + sq1;
   SAMFileReader reader1 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd1)));
   SAMFileReader reader2 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd2)));
   final List<SAMFileHeader> inputHeaders =
       Arrays.asList(reader1.getFileHeader(), reader2.getFileHeader());
   new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, inputHeaders, true);
 }
  private void saveResults(
      final MetricsFile<?, Integer> metrics,
      final SAMFileHeader readsHeader,
      final String inputFileName) {
    MetricsUtils.saveMetrics(metrics, out, getAuthHolder());

    if (metrics.getAllHistograms().isEmpty()) {
      logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null) {
      // Now run R to generate a chart

      // If we're working with a single library, assign that library's name
      // as a suffix to the plot title
      final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

      /*
       * A subtitle for the plot, usually corresponding to a library.
       */
      String plotSubtitle = "";
      if (readGroups.size() == 1) {
        plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
      }
      final RScriptExecutor executor = new RScriptExecutor();
      executor.addScript(new Resource(MeanQualityByCycle.R_SCRIPT, MeanQualityByCycle.class));
      executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
      executor.exec();
    }
  }
  @Override
  protected void setup(final SAMFileHeader header, final File samFile) {
    final String outext =
        (null != FILE_EXTENSION) ? FILE_EXTENSION : ""; // Add a file extension if desired
    preAdapterSummaryOut =
        new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT + outext);
    preAdapterDetailsOut =
        new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT + outext);
    baitBiasSummaryOut =
        new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT + outext);
    baitBiasDetailsOut =
        new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT + outext);

    IOUtil.assertFileIsWritable(preAdapterSummaryOut);
    IOUtil.assertFileIsWritable(preAdapterDetailsOut);
    IOUtil.assertFileIsWritable(baitBiasSummaryOut);
    IOUtil.assertFileIsWritable(baitBiasDetailsOut);

    for (final SAMReadGroupRecord rec : header.getReadGroups()) {
      samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
      libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }

    if (INTERVALS != null) {
      IOUtil.assertFileIsReadable(INTERVALS);
      intervalMask =
          new IntervalListReferenceSequenceMask(IntervalList.fromFile(INTERVALS).uniqued());
    }

    if (DB_SNP != null) {
      IOUtil.assertFileIsReadable(DB_SNP);
      dbSnpMask = new DbSnpBitSetUtil(DB_SNP, header.getSequenceDictionary());
    }

    // set record-level filters
    final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
    filters.add(new FailsVendorReadQualityFilter());
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    filters.add(new AlignedFilter(true)); // discard unmapped reads
    filters.add(new MappingQualityFilter(MINIMUM_MAPPING_QUALITY));
    if (!INCLUDE_UNPAIRED) {
      final int effectiveMaxInsertSize =
          (MAXIMUM_INSERT_SIZE == 0) ? Integer.MAX_VALUE : MAXIMUM_INSERT_SIZE;
      filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, effectiveMaxInsertSize));
    }
    recordFilter = new AggregateFilter(filters);

    // set up the artifact counters
    final String sampleAlias = StringUtil.join(",", new ArrayList<String>(samples));
    for (final String library : libraries) {
      artifactCounters.put(
          library, new ArtifactCounter(sampleAlias, library, CONTEXT_SIZE, TANDEM_READS));
    }
  }
  @Test(dataProvider = "data")
  public void testProgramGroupAndReadGroupMerge(File inputFiles[], File expectedOutputFile)
      throws IOException {

    BufferedReader reader = new BufferedReader(new FileReader(expectedOutputFile));

    String line;
    String expected_output = "";
    while ((line = reader.readLine()) != null) {
      expected_output += line + "\n";
    }

    final List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    for (final File inFile : inputFiles) {
      IOUtil.assertFileIsReadable(inFile);
      final SAMFileReader in = new SAMFileReader(inFile);
      // We are now checking for zero-length reads, so suppress complaint about that.
      in.setValidationStringency(ValidationStringency.SILENT);
      readers.add(in);
      headers.add(in.getFileHeader());
    }
    final MergingSamRecordIterator iterator;

    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, true);
    iterator = new MergingSamRecordIterator(headerMerger, readers, false);

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    SAMFileWriter writer =
        new SAMFileWriterFactory().makeSAMWriter(headerMerger.getMergedHeader(), true, baos);
    while (iterator.hasNext()) {
      writer.addAlignment(iterator.next());
    }
    writer.close();

    String actual_output = StringUtil.bytesToString(baos.toByteArray());

    List<String> actual = Arrays.asList(actual_output.split("\\n"));
    List<String> expected = Arrays.asList(expected_output.split("\\n"));
    for (int i = 0; i < expected.size(); i++) {
      if (expected.get(i).startsWith("@")) {
        Assert.assertTrue(headersEquivalent(actual.get(i), expected.get(i)));
      } else {
        List<String> expectedSamParts = Arrays.asList(expected.get(i).split("\\s*"));
        List<String> actualSamParts = Arrays.asList(actual.get(i).split("\\s*"));
        for (String exp : expectedSamParts) {
          Assert.assertTrue(actualSamParts.contains(exp));
        }
        for (String act : actualSamParts) {
          Assert.assertTrue(expectedSamParts.contains(act));
        }
      }
    }
  }
  /**
   * Assert that expectedCols are present
   *
   * @param actualCols The columns present in the MULTIPLEX_PARAMS file
   * @param expectedCols The columns that are REQUIRED
   */
  private void assertExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
    final Set<String> missingColumns = new HashSet<String>(expectedCols);
    missingColumns.removeAll(actualCols);

    if (missingColumns.size() > 0) {
      throw new PicardException(
          String.format(
              "MULTIPLEX_PARAMS file %s is missing the following columns: %s.",
              MULTIPLEX_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns)));
    }
  }
 @Test
 public void testSequenceDictionaryMerge() {
   final String sd1 = sq1 + sq2 + sq5;
   final String sd2 = sq2 + sq3 + sq4;
   SAMFileReader reader1 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd1)));
   SAMFileReader reader2 =
       new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd2)));
   final List<SAMFileHeader> inputHeaders =
       Arrays.asList(reader1.getFileHeader(), reader2.getFileHeader());
   SamFileHeaderMerger merger =
       new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, inputHeaders, true);
   final SAMFileHeader mergedHeader = merger.getMergedHeader();
   for (final SAMFileHeader inputHeader : inputHeaders) {
     int prevTargetIndex = -1;
     for (final SAMSequenceRecord sequenceRecord :
         inputHeader.getSequenceDictionary().getSequences()) {
       final int targetIndex = mergedHeader.getSequenceIndex(sequenceRecord.getSequenceName());
       Assert.assertNotSame(targetIndex, -1);
       Assert.assertTrue(prevTargetIndex < targetIndex);
       prevTargetIndex = targetIndex;
     }
   }
 }
 private void makeFastqRecords(
     final FastqRecord[] recs,
     final int[] indices,
     final ClusterData cluster,
     final boolean appendReadNumberSuffix) {
   for (short i = 0; i < indices.length; ++i) {
     final ReadData readData = cluster.getRead(indices[i]);
     final String readBases = StringUtil.bytesToString(readData.getBases()).replace('.', 'N');
     final String readName =
         readNameEncoder.generateReadName(cluster, appendReadNumberSuffix ? i + 1 : null);
     recs[i] =
         new FastqRecord(
             readName, readBases, null, SAMUtils.phredToFastq(readData.getQualities()));
   }
 }
 @Test
 public void testTrailingWhitespace() throws Exception {
   final File fasta = File.createTempFile("test", ".fasta");
   fasta.deleteOnExit();
   final PrintWriter writer = new PrintWriter(fasta);
   final String chr1 = "chr1";
   writer.println(">" + chr1);
   final String sequence = "ACGTACGT";
   writer.println(sequence);
   writer.println(sequence + " \t");
   writer.close();
   final FastaSequenceFile fastaReader = new FastaSequenceFile(fasta, true);
   final ReferenceSequence referenceSequence = fastaReader.nextSequence();
   Assert.assertEquals(referenceSequence.getName(), chr1);
   Assert.assertEquals(
       StringUtil.bytesToString(referenceSequence.getBases()), sequence + sequence);
 }
  /**
   * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the
   * sampleBarcodeFastqWriterMap map, where the key to the map is the concatenation of all
   * sampleBarcodes in order for the given line.
   */
  private void populateWritersFromMultiplexParams() {
    final TabbedTextFileWithHeaderParser libraryParamsParser =
        new TabbedTextFileWithHeaderParser(MULTIPLEX_PARAMS);

    final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT_PREFIX");
    final List<String> sampleBarcodeColumnLabels = new ArrayList<String>();
    for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) {
      sampleBarcodeColumnLabels.add("BARCODE_" + i);
    }

    expectedColumnLabels.addAll(sampleBarcodeColumnLabels);
    assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels);

    for (final TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) {
      List<String> sampleBarcodeValues = null;

      if (sampleBarcodeColumnLabels.size() > 0) {
        sampleBarcodeValues = new ArrayList<String>();
        for (final String sampleBarcodeLabel : sampleBarcodeColumnLabels) {
          sampleBarcodeValues.add(row.getField(sampleBarcodeLabel));
        }
      }

      final String key =
          (sampleBarcodeValues == null || sampleBarcodeValues.contains("N"))
              ? null
              : StringUtil.join("", sampleBarcodeValues);
      if (sampleBarcodeFastqWriterMap.containsKey(
          key)) { // This will catch the case of having more than 1 line in a non-barcoded
                  // MULTIPLEX_PARAMS file
        throw new PicardException(
            "Row for barcode "
                + key
                + " appears more than once in MULTIPLEX_PARAMS file "
                + MULTIPLEX_PARAMS);
      }

      final FastqRecordsWriter writer = buildWriter(new File(row.getField("OUTPUT_PREFIX")));
      sampleBarcodeFastqWriterMap.put(key, writer);
    }
    if (sampleBarcodeFastqWriterMap.isEmpty()) {
      throw new PicardException(
          "MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS + " does have any data rows.");
    }
    libraryParamsParser.close();
  }
  /**
   * Call this method to create a ClusterData iterator over the specified tiles.
   *
   * @return An iterator for reading the Illumina basecall output for the lane specified in the
   *     constructor.
   */
  public IlluminaDataProvider makeDataProvider(List<Integer> requestedTiles) {
    if (requestedTiles == null) {
      requestedTiles = availableTiles;
    } else {
      if (requestedTiles.size() == 0) {
        throw new PicardException(
            "Zero length tile list supplied to makeDataProvider, you must specify at least 1 tile OR pass NULL to use all available tiles");
      }
    }

    final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataType =
        new HashMap<IlluminaParser, Set<IlluminaDataType>>();
    for (final Map.Entry<SupportedIlluminaFormat, Set<IlluminaDataType>> fmToDt :
        formatToDataTypes.entrySet()) {
      parsersToDataType.put(makeParser(fmToDt.getKey(), requestedTiles), fmToDt.getValue());
    }

    log.debug(
        "The following parsers will be used by IlluminaDataProvider: "
            + StringUtil.join("," + parsersToDataType.keySet()));

    return new IlluminaDataProvider(outputMapping, parsersToDataType, basecallDirectory, lane);
  }
Exemple #13
0
  @Override
  protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsReadable(CHAIN);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(REJECT);

    ////////////////////////////////////////////////////////////////////////
    // Setup the inputs
    ////////////////////////////////////////////////////////////////////////
    final LiftOver liftOver = new LiftOver(CHAIN);
    final VCFFileReader in = new VCFFileReader(INPUT, false);

    logger.info("Loading up the target reference genome.");
    final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final Map<String, byte[]> refSeqs = new HashMap<>();
    for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
      refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases());
    }
    CloserUtil.close(walker);

    ////////////////////////////////////////////////////////////////////////
    // Setup the outputs
    ////////////////////////////////////////////////////////////////////////
    final VCFHeader inHeader = in.getFileHeader();
    final VCFHeader outHeader = new VCFHeader(inHeader);
    outHeader.setSequenceDictionary(walker.getSequenceDictionary());
    final VariantContextWriter out =
        new VariantContextWriterBuilder()
            .setOption(Options.INDEX_ON_THE_FLY)
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(walker.getSequenceDictionary())
            .build();
    out.writeHeader(outHeader);

    final VariantContextWriter rejects =
        new VariantContextWriterBuilder()
            .setOutputFile(REJECT)
            .unsetOption(Options.INDEX_ON_THE_FLY)
            .build();
    final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
    for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
    rejects.writeHeader(rejectHeader);

    ////////////////////////////////////////////////////////////////////////
    // Read the input VCF, lift the records over and write to the sorting
    // collection.
    ////////////////////////////////////////////////////////////////////////
    long failedLiftover = 0, failedAlleleCheck = 0, total = 0;
    logger.info("Lifting variants over and sorting.");

    final SortingCollection<VariantContext> sorter =
        SortingCollection.newInstance(
            VariantContext.class,
            new VCFRecordCodec(outHeader),
            outHeader.getVCFRecordComparator(),
            MAX_RECORDS_IN_RAM,
            TMP_DIR);

    ProgressLogger progress = new ProgressLogger(logger, 1000000, "read");

    for (final VariantContext ctx : in) {
      ++total;
      final Interval source =
          new Interval(
              ctx.getContig(),
              ctx.getStart(),
              ctx.getEnd(),
              false,
              ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
      final Interval target = liftOver.liftOver(source, 1.0);

      if (target == null) {
        rejects.add(new VariantContextBuilder(ctx).filter(FILTER_CANNOT_LIFTOVER).make());
        failedLiftover++;
      } else {
        // Fix the alleles if we went from positive to negative strand
        final List<Allele> alleles = new ArrayList<>();
        for (final Allele oldAllele : ctx.getAlleles()) {
          if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
            alleles.add(oldAllele);
          } else {
            alleles.add(
                Allele.create(
                    SequenceUtil.reverseComplement(oldAllele.getBaseString()),
                    oldAllele.isReference()));
          }
        }

        // Build the new variant context
        final VariantContextBuilder builder =
            new VariantContextBuilder(
                ctx.getSource(), target.getContig(), target.getStart(), target.getEnd(), alleles);

        builder.id(ctx.getID());
        builder.attributes(ctx.getAttributes());
        builder.genotypes(ctx.getGenotypes());
        builder.filters(ctx.getFilters());
        builder.log10PError(ctx.getLog10PError());

        // Check that the reference allele still agrees with the reference sequence
        boolean mismatchesReference = false;
        for (final Allele allele : builder.getAlleles()) {
          if (allele.isReference()) {
            final byte[] ref = refSeqs.get(target.getContig());
            final String refString =
                StringUtil.bytesToString(ref, target.getStart() - 1, target.length());

            if (!refString.equalsIgnoreCase(allele.getBaseString())) {
              mismatchesReference = true;
            }

            break;
          }
        }

        if (mismatchesReference) {
          rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make());
          failedAlleleCheck++;
        } else {
          sorter.add(builder.make());
        }
      }

      progress.record(ctx.getContig(), ctx.getStart());
    }

    final NumberFormat pfmt = new DecimalFormat("0.0000%");
    final String pct = pfmt.format((failedLiftover + failedAlleleCheck) / (double) total);
    logger.info("Processed ", total, " variants.");
    logger.info(Long.toString(failedLiftover), " variants failed to liftover.");
    logger.info(
        Long.toString(failedAlleleCheck),
        " variants lifted over but had mismatching reference alleles after lift over.");
    logger.info(pct, " of variants were not successfully lifted over and written to the output.");

    rejects.close();
    in.close();

    ////////////////////////////////////////////////////////////////////////
    // Write the sorted outputs to the final output file
    ////////////////////////////////////////////////////////////////////////
    sorter.doneAdding();
    progress = new ProgressLogger(logger, 1000000, "written");
    logger.info("Writing out sorted records to final VCF.");

    for (final VariantContext ctx : sorter) {
      out.add(ctx);
      progress.record(ctx.getContig(), ctx.getStart());
    }
    out.close();
    sorter.cleanup();

    return null;
  }
 /**
  * Returns the bases represented by this ReferenceSequence as a String. Since this will copy the
  * bases and convert them to two-byte characters, this should not be used on very long reference
  * sequences, but as a convenience when manipulating short sequences returned by {@link
  * ReferenceSequenceFile#getSubsequenceAt(String, long, long)}
  *
  * @return The set of bases represented by this ReferenceSequence, as a String
  */
 public String getBaseString() {
   return StringUtil.bytesToString(bases);
 }