コード例 #1
0
ファイル: ReadUtils.java プロジェクト: johandahlberg/gatk
  /**
   * Finds the adaptor boundary around the read and returns the first base inside the adaptor that
   * is closest to the read boundary. If the read is in the positive strand, this is the first base
   * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative
   * strand, this is the first base before the beginning of the fragment.
   *
   * <p>There are two cases we need to treat here:
   *
   * <p>1) Our read is in the reverse strand :
   *
   * <p><----------------------| * |--------------------->
   *
   * <p>in these cases, the adaptor boundary is at the mate start (minus one)
   *
   * <p>2) Our read is in the forward strand :
   *
   * <p>|----------------------> * <----------------------|
   *
   * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert
   * size (plus one)
   *
   * @param read the read being tested for the adaptor boundary
   * @return the reference coordinate for the adaptor boundary (effectively the first base IN the
   *     adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another
   *     contig.
   */
  public static Integer getAdaptorBoundary(final SAMRecord read) {
    final int MAXIMUM_ADAPTOR_LENGTH = 8;
    final int insertSize =
        Math.abs(
            read
                .getInferredInsertSize()); // the inferred insert size can be negative if the mate
                                           // is mapped before the read (so we take the absolute
                                           // value)

    if (insertSize == 0
        || read
            .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or
                                    // unmapped pairs
    return null;

    Integer
        adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first
                         // base IN the adaptor, closest to the read)
    if (read.getReadNegativeStrandFlag())
      adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
    else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header)

    if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH)
        || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH))
      adaptorBoundary =
          null; // we are being conservative by not allowing the adaptor boundary to go beyond what
                // we belive is the maximum size of an adaptor

    return adaptorBoundary;
  }
コード例 #2
0
    public void set(MouseEvent e, SAMRecord sr) {
      if (sr == null) return;
      StringBuffer text = new StringBuffer();
      text.append("<html>");

      if (sr != null) {
        text.append(
            MessageManager.getString("shortreadtrack.name") + " " + sr.getReadName() + "<br/>");
        text.append(
            MessageManager.getString("shortreadtrack.len") + " " + sr.getReadLength() + "<br/>");
        text.append(
            MessageManager.getString("shortreadtrack.cigar") + " " + sr.getCigarString() + "<br/>");
        text.append(
            MessageManager.getString("shortreadtrack.sequence")
                + " "
                + rerun(sr.getReadString())
                + "<br/>");
        text.append(
            MessageManager.getString("shortreadtrack.paired")
                + " "
                + sr.getReadPairedFlag()
                + "<br/>");
        if (sr.getReadPairedFlag()) {
          if (!sr.getMateUnmappedFlag())
            text.append(
                MessageManager.getString("shortreadtrack.mate")
                    + " "
                    + sr.getMateReferenceName()
                    + ":"
                    + sr.getMateAlignmentStart()
                    + "<br/>");
          else text.append(MessageManager.getString("shortreadtrack.mate_missing") + "<br/>");
          text.append(
              MessageManager.getString("shortreadtrack.second") + " " + sr.getFirstOfPairFlag());
        }
        // text.append("<br/>");
      }
      text.append("</html>");
      if (!text.toString().equals(floater.getText())) {
        floater.setText(text.toString());
        this.pack();
      }
      setLocation(e.getXOnScreen() + 5, e.getYOnScreen() + 5);

      if (!isVisible()) {
        setVisible(true);
      }
    }
コード例 #3
0
ファイル: Biostar59647.java プロジェクト: hangelwen/jvarkit
  @Override
  public int doWork(String[] args) {
    File refFile = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt getopt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;
    while ((c = getopt.getopt(args, "hvL:r:")) != -1) {
      switch (c) {
        case 'h':
          printUsage();
          return 0;
        case 'v':
          System.out.println(getVersion());
          return 0;
        case 'L':
          getLogger().setLevel(java.util.logging.Level.parse(getopt.getOptArg()));
          break;
        case 'r':
          refFile = new File(getopt.getOptArg());
          break;
        case ':':
          System.err.println("Missing argument for option -" + getopt.getOptOpt());
          return -1;
        default:
          System.err.println("Unknown option -" + getopt.getOptOpt());
          return -1;
      }
    }

    if (refFile == null) {
      error("Undefined REF file");
      return -1;
    }
    File bamFile = null;
    if (getopt.getOptInd() + 1 != args.length) {
      info("reading from stdin.");
    } else {
      bamFile = new File(args[getopt.getOptInd()]);
    }

    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SAMFileReader samFileReader = null;

    try {
      GenomicSequence genomicSequence = null;
      indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
      SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
      samFileReader = null;
      if (bamFile == null) {
        samFileReader = new SAMFileReader(System.in);
      } else {
        samFileReader = new SAMFileReader(bamFile);
      }
      XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
      XMLStreamWriter w = xmlfactory.createXMLStreamWriter(System.out, "UTF-8");
      w.writeStartDocument("UTF-8", "1.0");
      w.writeStartElement("sam");
      w.writeComment(getProgramCommandLine());
      w.writeAttribute("ref", (bamFile == null ? "stdin" : bamFile.getPath()));
      w.writeAttribute("bam", args[1]);

      SAMRecordIterator iter = samFileReader.iterator();
      while (iter.hasNext()) {
        SAMRecord rec = iter.next();

        final byte readbases[] = rec.getReadBases();
        w.writeStartElement("read");

        w.writeStartElement("name");
        w.writeCharacters(rec.getReadName());
        w.writeEndElement();
        w.writeStartElement("sequence");
        w.writeCharacters(new String(readbases));
        w.writeEndElement();
        w.writeStartElement("flags");
        w.writeAttribute("paired", String.valueOf(rec.getReadPairedFlag()));
        w.writeAttribute(
            "failsVendorQual", String.valueOf(rec.getReadFailsVendorQualityCheckFlag()));
        w.writeAttribute("mapped", String.valueOf(!rec.getReadUnmappedFlag()));
        w.writeAttribute("strand", (rec.getReadNegativeStrandFlag() ? "-" : "+"));

        if (rec.getReadPairedFlag()) {
          w.writeAttribute("mate-mapped", String.valueOf(!rec.getMateUnmappedFlag()));
          w.writeAttribute("mate-strand", (rec.getMateNegativeStrandFlag() ? "-" : "+"));
          w.writeAttribute("proper-pair", String.valueOf(rec.getProperPairFlag()));
        }

        w.writeCharacters(String.valueOf(rec.getFlags()));
        w.writeEndElement();
        if (!rec.getReadUnmappedFlag()) {
          w.writeStartElement("qual");
          w.writeCharacters(String.valueOf(rec.getMappingQuality()));
          w.writeEndElement();

          w.writeStartElement("chrom");
          w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
          w.writeCharacters(rec.getReferenceName());
          w.writeEndElement();
          w.writeStartElement("pos");
          w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
          w.writeEndElement();
          w.writeStartElement("cigar");
          w.writeCharacters(rec.getCigarString());
          w.writeEndElement();
        }

        if (!rec.getMateUnmappedFlag()) {
          w.writeStartElement("mate-chrom");
          w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
          w.writeCharacters(rec.getMateReferenceName());
          w.writeEndElement();
          w.writeStartElement("mate-pos");
          w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
          w.writeEndElement();
        }

        if (!rec.getReadUnmappedFlag()) {
          if (genomicSequence == null
              || genomicSequence.getChrom().equals(rec.getReferenceName())) {
            genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
          }

          w.writeStartElement("align");

          int readIndex = 0;
          int refIndex = rec.getAlignmentStart();

          for (final CigarElement e : rec.getCigar().getCigarElements()) {
            switch (e.getOperator()) {
              case H:
                break; // ignore hard clips
              case P:
                break; // ignore pads
              case I: // cont.
              case S:
                {
                  final int length = e.getLength();
                  for (int i = 0; i < length; ++i) {
                    w.writeEmptyElement(e.getOperator().name());
                    w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                    if (readIndex >= 0 && readIndex < readbases.length) {
                      w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
                    }
                    readIndex++;
                  }
                  break;
                }
              case N: // cont. -- reference skip
              case D:
                {
                  final int length = e.getLength();
                  for (int i = 0; i < length; ++i) {
                    w.writeEmptyElement(e.getOperator().name());
                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                      w.writeAttribute(
                          "ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
                    }
                    refIndex++;
                  }
                  break;
                }
              case M:
              case EQ:
              case X:
                {
                  final int length = e.getLength();
                  for (int i = 0; i < length; ++i) {
                    w.writeEmptyElement(e.getOperator().name());
                    char baseRead = '\0';
                    if (readIndex >= 0 && readIndex < readbases.length) {
                      baseRead = (char) (rec.getReadBases()[readIndex]);
                      w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                      w.writeAttribute("read-base", String.valueOf(baseRead));
                    }
                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                      char baseRef = genomicSequence.charAt(refIndex - 1);
                      w.writeAttribute("ref-base", String.valueOf(baseRef));
                      if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
                        w.writeAttribute("mismatch", "true");
                      }
                    }

                    refIndex++;
                    readIndex++;
                  }
                  break;
                }

              default:
                throw new IllegalStateException(
                    "Case statement didn't deal with cigar op: " + e.getOperator());
            }
          }
        }

        w.writeEndElement();

        w.writeEndElement();

        iter.close();
        w.writeEndElement();
      }
      w.writeEndElement();
      w.writeEndDocument();
      w.flush();
      w.close();
    } catch (Exception err) {
      error(err);
      return -1;
    } finally {
      CloserUtil.close(samFileReader);
      CloserUtil.close(indexedFastaSequenceFile);
    }
    return 0;
  }