private void scanQseqLine(Text line, Text key, SequencedFragment fragment) { setFieldPositionsAndLengths(line); // Build the key. We concatenate all fields from 0 to 5 (machine to y-pos) // and then the read number, replacing the tabs with colons. key.clear(); // append up and including field[5] key.append(line.getBytes(), 0, fieldPositions[5] + fieldLengths[5]); // replace tabs with : byte[] bytes = key.getBytes(); int temporaryEnd = key.getLength(); for (int i = 0; i < temporaryEnd; ++i) if (bytes[i] == '\t') bytes[i] = ':'; // append the read number key.append( line.getBytes(), fieldPositions[7] - 1, fieldLengths[7] + 1); // +/- 1 to catch the preceding tab. // convert the tab preceding the read number into a : key.getBytes()[temporaryEnd] = ':'; // now the fragment try { fragment.clear(); fragment.setInstrument(Text.decode(line.getBytes(), fieldPositions[0], fieldLengths[0])); fragment.setRunNumber( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[1], fieldLengths[1]))); // fragment.setFlowcellId(); fragment.setLane( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[2], fieldLengths[2]))); fragment.setTile( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[3], fieldLengths[3]))); fragment.setXpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[4], fieldLengths[4]))); fragment.setYpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[5], fieldLengths[5]))); fragment.setRead( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[7], fieldLengths[7]))); fragment.setFilterPassed(line.getBytes()[fieldPositions[10]] != '0'); // fragment.setControlNumber(); if (fieldLengths[6] > 0 && line.getBytes()[fieldPositions[6]] == '0') // 0 is a null index sequence fragment.setIndexSequence(null); else fragment.setIndexSequence( Text.decode(line.getBytes(), fieldPositions[6], fieldLengths[6]).replace('.', 'N')); } catch (CharacterCodingException e) { throw new FormatException( "Invalid character format at " + makePositionMessage(this.pos - line.getLength()) + "; line: " + line); } fragment.getSequence().append(line.getBytes(), fieldPositions[8], fieldLengths[8]); fragment.getQuality().append(line.getBytes(), fieldPositions[9], fieldLengths[9]); }
/* * This method applies some transformations to the read and quality data. * * <ul> * <li>'.' in the read are converted to 'N'</li> * <li>the base quality encoding is converted to 'sanger', unless otherwise * requested by the configuration.</li> * </ul> * * @exception FormatException Thrown if the record contains base quality scores * outside the range allowed by the format. */ private void postProcessSequencedFragment(SequencedFragment fragment) { byte[] bytes = fragment.getSequence().getBytes(); // replace . with N for (int i = 0; i < fieldLengths[8]; ++i) if (bytes[i] == '.') bytes[i] = 'N'; if (qualityEncoding == BaseQualityEncoding.Illumina) { // convert illumina to sanger scale SequencedFragment.convertQuality( fragment.getQuality(), BaseQualityEncoding.Illumina, BaseQualityEncoding.Sanger); } else // sanger qualities. { int outOfRangeElement = SequencedFragment.verifyQuality(fragment.getQuality(), BaseQualityEncoding.Sanger); if (outOfRangeElement >= 0) { throw new FormatException( "qseq base quality score out of range for Sanger Phred+33 format (found " + (fragment.getQuality().getBytes()[outOfRangeElement] - FormatConstants.SANGER_OFFSET) + ").\n" + "Although Sanger format has been requested, maybe qualities are in Illumina Phred+64 format?\n"); } } }