Пример #1
0
  /**
   * Finds the adaptor boundary around the read and returns the first base inside the adaptor that
   * is closest to the read boundary. If the read is in the positive strand, this is the first base
   * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative
   * strand, this is the first base before the beginning of the fragment.
   *
   * <p>There are two cases we need to treat here:
   *
   * <p>1) Our read is in the reverse strand :
   *
   * <p><----------------------| * |--------------------->
   *
   * <p>in these cases, the adaptor boundary is at the mate start (minus one)
   *
   * <p>2) Our read is in the forward strand :
   *
   * <p>|----------------------> * <----------------------|
   *
   * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert
   * size (plus one)
   *
   * @param read the read being tested for the adaptor boundary
   * @return the reference coordinate for the adaptor boundary (effectively the first base IN the
   *     adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another
   *     contig.
   */
  public static Integer getAdaptorBoundary(final SAMRecord read) {
    final int MAXIMUM_ADAPTOR_LENGTH = 8;
    final int insertSize =
        Math.abs(
            read
                .getInferredInsertSize()); // the inferred insert size can be negative if the mate
                                           // is mapped before the read (so we take the absolute
                                           // value)

    if (insertSize == 0
        || read
            .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or
                                    // unmapped pairs
    return null;

    Integer
        adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first
                         // base IN the adaptor, closest to the read)
    if (read.getReadNegativeStrandFlag())
      adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
    else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header)

    if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH)
        || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH))
      adaptorBoundary =
          null; // we are being conservative by not allowing the adaptor boundary to go beyond what
                // we belive is the maximum size of an adaptor

    return adaptorBoundary;
  }
Пример #2
0
  public void find_coverage(SAMResource sres) {
    int start_base = sres.region.range.start;
    int end_base = sres.region.range.end;

    int coverage_len = (end_base - start_base) + 1;
    int i, end, ref_i, read_i, len;

    int[] coverage = new int[coverage_len];
    Arrays.fill(coverage, 0);

    WorkingFile wf = null;
    if (outfile != null) {
      try {
        wf = new WorkingFile(outfile);
        ps = wf.getPrintStream();
      } catch (Exception e) {
        System.err.println("I/O error: " + e); // debug
        e.printStackTrace();
        System.exit(1);
      }
    }

    try {
      //
      //  gather coverage info:
      //
      CloseableIterator<SAMRecord> iterator = sres.get_iterator();
      int read_count = 0;
      int ref_min = -1;
      int ref_max = -1;

      while (iterator.hasNext()) {
        SAMRecord sr = iterator.next();
        read_count++;

        //	System.err.println(sr.getReadName() + ": " + sr.getAlignmentStart() + "-" +
        // sr.getAlignmentEnd());  // debug

        if (sr.getReadUnmappedFlag()) continue;
        if (sr.getDuplicateReadFlag()) {
          if (verbose_mode)
            System.err.println(
                sr.getReadName()
                    + "."
                    + (sr.getReadNegativeStrandFlag() ? "R" : "F")
                    + " ignoring, duplicate");
          continue;
        }

        byte[] read = sr.getReadBases();
        byte[] quals = sr.getBaseQualities();

        for (AlignmentBlock ab : sr.getAlignmentBlocks()) {
          len = ab.getLength();
          read_i = ab.getReadStart() - 1;
          ref_i = ab.getReferenceStart() - start_base;

          if (ref_min == -1 || ref_i < ref_min) ref_min = ref_i;

          for (i = read_i, end = read_i + len; i < end; i++, ref_i++) {
            if (ref_i >= 0 && ref_i < coverage_len) {
              if (quals[i] >= MIN_QUALITY) {
                if (verbose_mode)
                  System.err.println(
                      sr.getReadName()
                          + "."
                          + (sr.getReadNegativeStrandFlag() ? "R" : "F")
                          + " hit at "
                          + (ref_i + start_base)
                          + " as="
                          + sr.getAlignmentStart()
                          + " ae="
                          + sr.getAlignmentEnd());
                coverage[ref_i]++;
              } else if (verbose_mode) {
                System.err.println(
                    sr.getReadName()
                        + "."
                        + (sr.getReadNegativeStrandFlag() ? "R" : "F")
                        + " qual_reject at "
                        + (ref_i + start_base)
                        + " as="
                        + sr.getAlignmentStart()
                        + " ae="
                        + sr.getAlignmentEnd());
              }
            }
          }
          if (ref_max == -1 || ref_i > ref_max) ref_max = ref_i;
        }
      }
      sres.close();
      System.err.println(
          "records:"
              + read_count
              + " ref_min:"
              + (ref_min + start_base)
              + " ref_max:"
              + (ref_max + start_base)); // debug

      //
      //  report coverage info:
      //
      for (i = 0; i < coverage.length; i++) {
        if (name != null) ps.print(name + ",");
        ps.println((i + start_base) + "," + coverage[i]); // debug
      }
      if (wf != null) wf.finish();

    } catch (Exception e) {
      System.err.println("ERROR: " + e); // debug
      e.printStackTrace();
    }
  }