/** * Finds the adaptor boundary around the read and returns the first base inside the adaptor that * is closest to the read boundary. If the read is in the positive strand, this is the first base * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative * strand, this is the first base before the beginning of the fragment. * * <p>There are two cases we need to treat here: * * <p>1) Our read is in the reverse strand : * * <p><----------------------| * |---------------------> * * <p>in these cases, the adaptor boundary is at the mate start (minus one) * * <p>2) Our read is in the forward strand : * * <p>|----------------------> * <----------------------| * * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert * size (plus one) * * @param read the read being tested for the adaptor boundary * @return the reference coordinate for the adaptor boundary (effectively the first base IN the * adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another * contig. */ public static Integer getAdaptorBoundary(final SAMRecord read) { final int MAXIMUM_ADAPTOR_LENGTH = 8; final int insertSize = Math.abs( read .getInferredInsertSize()); // the inferred insert size can be negative if the mate // is mapped before the read (so we take the absolute // value) if (insertSize == 0 || read .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or // unmapped pairs return null; Integer adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first // base IN the adaptor, closest to the read) if (read.getReadNegativeStrandFlag()) adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header) if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH)) adaptorBoundary = null; // we are being conservative by not allowing the adaptor boundary to go beyond what // we belive is the maximum size of an adaptor return adaptorBoundary; }
public void find_coverage(SAMResource sres) { int start_base = sres.region.range.start; int end_base = sres.region.range.end; int coverage_len = (end_base - start_base) + 1; int i, end, ref_i, read_i, len; int[] coverage = new int[coverage_len]; Arrays.fill(coverage, 0); WorkingFile wf = null; if (outfile != null) { try { wf = new WorkingFile(outfile); ps = wf.getPrintStream(); } catch (Exception e) { System.err.println("I/O error: " + e); // debug e.printStackTrace(); System.exit(1); } } try { // // gather coverage info: // CloseableIterator<SAMRecord> iterator = sres.get_iterator(); int read_count = 0; int ref_min = -1; int ref_max = -1; while (iterator.hasNext()) { SAMRecord sr = iterator.next(); read_count++; // System.err.println(sr.getReadName() + ": " + sr.getAlignmentStart() + "-" + // sr.getAlignmentEnd()); // debug if (sr.getReadUnmappedFlag()) continue; if (sr.getDuplicateReadFlag()) { if (verbose_mode) System.err.println( sr.getReadName() + "." + (sr.getReadNegativeStrandFlag() ? "R" : "F") + " ignoring, duplicate"); continue; } byte[] read = sr.getReadBases(); byte[] quals = sr.getBaseQualities(); for (AlignmentBlock ab : sr.getAlignmentBlocks()) { len = ab.getLength(); read_i = ab.getReadStart() - 1; ref_i = ab.getReferenceStart() - start_base; if (ref_min == -1 || ref_i < ref_min) ref_min = ref_i; for (i = read_i, end = read_i + len; i < end; i++, ref_i++) { if (ref_i >= 0 && ref_i < coverage_len) { if (quals[i] >= MIN_QUALITY) { if (verbose_mode) System.err.println( sr.getReadName() + "." + (sr.getReadNegativeStrandFlag() ? "R" : "F") + " hit at " + (ref_i + start_base) + " as=" + sr.getAlignmentStart() + " ae=" + sr.getAlignmentEnd()); coverage[ref_i]++; } else if (verbose_mode) { System.err.println( sr.getReadName() + "." + (sr.getReadNegativeStrandFlag() ? "R" : "F") + " qual_reject at " + (ref_i + start_base) + " as=" + sr.getAlignmentStart() + " ae=" + sr.getAlignmentEnd()); } } } if (ref_max == -1 || ref_i > ref_max) ref_max = ref_i; } } sres.close(); System.err.println( "records:" + read_count + " ref_min:" + (ref_min + start_base) + " ref_max:" + (ref_max + start_base)); // debug // // report coverage info: // for (i = 0; i < coverage.length; i++) { if (name != null) ps.print(name + ","); ps.println((i + start_base) + "," + coverage[i]); // debug } if (wf != null) wf.finish(); } catch (Exception e) { System.err.println("ERROR: " + e); // debug e.printStackTrace(); } }