Ejemplo n.º 1
0
  @Override
  protected void doWork(String inputSource, VcfIterator r, VariantContextWriter w)
      throws IOException {
    VCFHeader header = r.getHeader();

    VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
    h2.addMetaDataLine(
        new VCFInfoHeaderLine(
            TAG,
            VCFHeaderLineCount.UNBOUNDED,
            VCFHeaderLineType.String,
            "metadata added from " + TABIX + " . Format was " + FORMAT));
    h2.addMetaDataLine(
        new VCFHeaderLine(
            getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    h2.addMetaDataLine(
        new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    h2.addMetaDataLine(
        new VCFHeaderLine(
            getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
    h2.addMetaDataLine(
        new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));

    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    w.writeHeader(h2);
    while (r.hasNext()) {
      VariantContext ctx = progress.watch(r.next());
      Set<String> annotations = new HashSet<String>();

      CloseableIterator<BedLine> iter =
          this.bedReader.iterator(ctx.getContig(), ctx.getStart() - 1, ctx.getEnd() + 1);
      while (iter.hasNext()) {
        BedLine bedLine = iter.next();

        if (!ctx.getContig().equals(bedLine.getContig())) continue;
        if (ctx.getStart() - 1 >= bedLine.getEnd()) continue;
        if (ctx.getEnd() - 1 < bedLine.getStart()) continue;

        String newannot = this.parsedFormat.toString(bedLine);
        if (!newannot.isEmpty()) annotations.add(VCFUtils.escapeInfoField(newannot));
      }
      CloserUtil.close(iter);

      if (annotations.isEmpty()) {
        w.add(ctx);
        continue;
      }
      VariantContextBuilder vcb = new VariantContextBuilder(ctx);
      vcb.attribute(TAG, annotations.toArray());
      w.add(vcb.make());
      incrVariantCount();
      if (checkOutputError()) break;
    }
    progress.finish();
  }
Ejemplo n.º 2
0
  @Override
  public void execute() {
    VariantContextWriter vcw =
        new VariantContextWriterBuilder()
            .setOutputFile(out)
            .clearIndexCreator()
            .setReferenceDictionary(VCF.getFileHeader().getSequenceDictionary())
            .build();

    VCFHeader vh = VCF.getFileHeader();
    vh.addMetaDataLine(
        new VCFInfoHeaderLine(
            "region", 1, VCFHeaderLineType.String, "region type in which the variant occurs"));

    vcw.writeHeader(VCF.getFileHeader());

    IntervalTreeMap<String> itm = loadRegions();

    int num = 0;

    for (VariantContext vc : VCF) {
      if (!vc.isFiltered()
          && vc.getGenotype(CHILD).isCalled()
          && vc.getGenotype(MOTHER).isCalled()
          && vc.getGenotype(FATHER).isCalled()
          && vc.getGenotype(CHILD).getType().equals(GenotypeType.HET)
          && vc.getGenotype(MOTHER).getType().equals(GenotypeType.HOM_REF)
          && vc.getGenotype(FATHER).getType().equals(GenotypeType.HOM_REF)
          && (!vc.getGenotype(CHILD).hasAnyAttribute("GQ")
              || (vc.getGenotype(CHILD).getGQ() > GQ_THRESHOLD
                  && vc.getGenotype(MOTHER).getGQ() > GQ_THRESHOLD
                  && vc.getGenotype(FATHER).getGQ() > GQ_THRESHOLD))
          && (!vc.getGenotype(CHILD).hasAnyAttribute("DP")
              || (vc.getGenotype(CHILD).getDP() > DP_THRESHOLD
                  && vc.getGenotype(MOTHER).getDP() > DP_THRESHOLD
                  && vc.getGenotype(FATHER).getDP() > DP_THRESHOLD))) {
        vcw.add(vc);

        num++;
      }
    }

    vcw.close();

    log.info("num: {}", num);
  }
Ejemplo n.º 3
0
  @Override
  protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsReadable(CHAIN);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(REJECT);

    ////////////////////////////////////////////////////////////////////////
    // Setup the inputs
    ////////////////////////////////////////////////////////////////////////
    final LiftOver liftOver = new LiftOver(CHAIN);
    final VCFFileReader in = new VCFFileReader(INPUT, false);

    logger.info("Loading up the target reference genome.");
    final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final Map<String, byte[]> refSeqs = new HashMap<>();
    for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
      refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases());
    }
    CloserUtil.close(walker);

    ////////////////////////////////////////////////////////////////////////
    // Setup the outputs
    ////////////////////////////////////////////////////////////////////////
    final VCFHeader inHeader = in.getFileHeader();
    final VCFHeader outHeader = new VCFHeader(inHeader);
    outHeader.setSequenceDictionary(walker.getSequenceDictionary());
    final VariantContextWriter out =
        new VariantContextWriterBuilder()
            .setOption(Options.INDEX_ON_THE_FLY)
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(walker.getSequenceDictionary())
            .build();
    out.writeHeader(outHeader);

    final VariantContextWriter rejects =
        new VariantContextWriterBuilder()
            .setOutputFile(REJECT)
            .unsetOption(Options.INDEX_ON_THE_FLY)
            .build();
    final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
    for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
    rejects.writeHeader(rejectHeader);

    ////////////////////////////////////////////////////////////////////////
    // Read the input VCF, lift the records over and write to the sorting
    // collection.
    ////////////////////////////////////////////////////////////////////////
    long failedLiftover = 0, failedAlleleCheck = 0, total = 0;
    logger.info("Lifting variants over and sorting.");

    final SortingCollection<VariantContext> sorter =
        SortingCollection.newInstance(
            VariantContext.class,
            new VCFRecordCodec(outHeader),
            outHeader.getVCFRecordComparator(),
            MAX_RECORDS_IN_RAM,
            TMP_DIR);

    ProgressLogger progress = new ProgressLogger(logger, 1000000, "read");

    for (final VariantContext ctx : in) {
      ++total;
      final Interval source =
          new Interval(
              ctx.getContig(),
              ctx.getStart(),
              ctx.getEnd(),
              false,
              ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
      final Interval target = liftOver.liftOver(source, 1.0);

      if (target == null) {
        rejects.add(new VariantContextBuilder(ctx).filter(FILTER_CANNOT_LIFTOVER).make());
        failedLiftover++;
      } else {
        // Fix the alleles if we went from positive to negative strand
        final List<Allele> alleles = new ArrayList<>();
        for (final Allele oldAllele : ctx.getAlleles()) {
          if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
            alleles.add(oldAllele);
          } else {
            alleles.add(
                Allele.create(
                    SequenceUtil.reverseComplement(oldAllele.getBaseString()),
                    oldAllele.isReference()));
          }
        }

        // Build the new variant context
        final VariantContextBuilder builder =
            new VariantContextBuilder(
                ctx.getSource(), target.getContig(), target.getStart(), target.getEnd(), alleles);

        builder.id(ctx.getID());
        builder.attributes(ctx.getAttributes());
        builder.genotypes(ctx.getGenotypes());
        builder.filters(ctx.getFilters());
        builder.log10PError(ctx.getLog10PError());

        // Check that the reference allele still agrees with the reference sequence
        boolean mismatchesReference = false;
        for (final Allele allele : builder.getAlleles()) {
          if (allele.isReference()) {
            final byte[] ref = refSeqs.get(target.getContig());
            final String refString =
                StringUtil.bytesToString(ref, target.getStart() - 1, target.length());

            if (!refString.equalsIgnoreCase(allele.getBaseString())) {
              mismatchesReference = true;
            }

            break;
          }
        }

        if (mismatchesReference) {
          rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make());
          failedAlleleCheck++;
        } else {
          sorter.add(builder.make());
        }
      }

      progress.record(ctx.getContig(), ctx.getStart());
    }

    final NumberFormat pfmt = new DecimalFormat("0.0000%");
    final String pct = pfmt.format((failedLiftover + failedAlleleCheck) / (double) total);
    logger.info("Processed ", total, " variants.");
    logger.info(Long.toString(failedLiftover), " variants failed to liftover.");
    logger.info(
        Long.toString(failedAlleleCheck),
        " variants lifted over but had mismatching reference alleles after lift over.");
    logger.info(pct, " of variants were not successfully lifted over and written to the output.");

    rejects.close();
    in.close();

    ////////////////////////////////////////////////////////////////////////
    // Write the sorted outputs to the final output file
    ////////////////////////////////////////////////////////////////////////
    sorter.doneAdding();
    progress = new ProgressLogger(logger, 1000000, "written");
    logger.info("Writing out sorted records to final VCF.");

    for (final VariantContext ctx : sorter) {
      out.add(ctx);
      progress.record(ctx.getContig(), ctx.getStart());
    }
    out.close();
    sorter.cleanup();

    return null;
  }
  @Override
  protected void doWork(String source, VcfIterator in, VariantContextWriter out)
      throws IOException {
    try {
      VCFHeader header = in.getHeader();
      VCFHeader h2 = new VCFHeader(header);
      h2.addMetaDataLine(
          new VCFHeaderLine(
              getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
      h2.addMetaDataLine(
          new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
      h2.addMetaDataLine(
          new VCFHeaderLine(
              getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
      h2.addMetaDataLine(
          new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));

      out.writeHeader(h2);

      final VepPredictionParser vepParser = new VepPredictionParser(header);
      final SnpEffPredictionParser snpEffparser = new SnpEffPredictionParser(header);
      final MyPredictionParser myPredParser = new MyPredictionParser(header);
      SAMSequenceDictionaryProgress progress =
          new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
      while (in.hasNext()) {
        this.checkKnimeCancelled();

        VariantContext ctx = progress.watch(in.next());
        boolean keep = false;

        for (SnpEffPredictionParser.SnpEffPrediction pred : snpEffparser.getPredictions(ctx)) {
          if (hasUserTem(pred.getSOTerms())) {
            keep = true;
            break;
          }
        }
        if (!keep) {
          for (VepPredictionParser.VepPrediction pred : vepParser.getPredictions(ctx)) {
            if (hasUserTem(pred.getSOTerms())) {
              keep = true;
              break;
            }
          }
        }
        if (!keep) {
          for (MyPredictionParser.MyPrediction pred : myPredParser.getPredictions(ctx)) {
            if (hasUserTem(pred.getSOTerms())) {
              keep = true;
              break;
            }
          }
        }
        if (isInverseResult()) keep = !keep;
        if (keep) {
          incrVariantCount();
          out.add(ctx);
        }
        if (checkOutputError()) break;
      }
      progress.finish();
    } finally {
    }
  }
Ejemplo n.º 5
0
  @Override
  protected void doWork(VcfIterator r, VariantContextWriter w) throws IOException {
    AbstractVCFCodec codeIn3 = VCFUtils.createDefaultVCFCodec();
    String line;

    StringWriter sw = new StringWriter();
    LOG.info("opening tabix file: " + this.TABIX);
    TabixReader tabix = new TabixReader(this.TABIX);

    while ((line = tabix.readLine()) != null) {
      if (!line.startsWith(VCFHeader.HEADER_INDICATOR)) {
        break;
      }
      sw.append(line).append("\n");
    }
    VCFHeader header3 =
        (VCFHeader)
            codeIn3.readActualHeader(
                new LineIteratorImpl(
                    LineReaderUtil.fromBufferedStream(
                        new ByteArrayInputStream(sw.toString().getBytes()))));
    VCFHeader header1 = r.getHeader();

    VCFHeader h2 =
        new VCFHeader(header1.getMetaDataInInputOrder(), header1.getSampleNamesInOrder());
    for (String infoId : this.INFO_IDS) {
      VCFInfoHeaderLine vihl = header3.getInfoHeaderLine(infoId);
      if (vihl == null) {
        LOG.warn("Not INFO=" + infoId + " in " + TABIX);
        continue;
      }
      if (h2.getInfoHeaderLine(infoId) != null) {
        LOG.warn("Input already contains INFO=" + vihl);
      }
      h2.addMetaDataLine(vihl);
    }

    if (ALT_CONFLICT_FLAG != null) {
      h2.addMetaDataLine(
          new VCFInfoHeaderLine(
              ALT_CONFLICT_FLAG,
              1,
              VCFHeaderLineType.Flag,
              "conflict ALT allele with " + this.TABIX));
    }

    w.writeHeader(h2);
    while (r.hasNext()) {
      VariantContext ctx1 = r.next();

      VariantContextBuilder vcb = new VariantContextBuilder(ctx1);
      String line2;
      String BEST_ID = null;
      boolean best_id_match_alt = false;

      List<VariantContext> variantsList = new ArrayList<VariantContext>();

      int[] array = tabix.parseReg(ctx1.getChr() + ":" + (ctx1.getStart()) + "-" + (ctx1.getEnd()));
      TabixReader.Iterator iter = null;

      if (array != null && array.length == 3 && array[0] != -1 && array[1] >= 0 && array[2] >= 0) {
        iter = tabix.query(array[0], array[1], array[2]);
      } else {
        LOG.info("Cannot get " + ctx1.getChr() + ":" + (ctx1.getStart()) + "-" + (ctx1.getEnd()));
      }

      while (iter != null && (line2 = iter.next()) != null) {
        VariantContext ctx3 = codeIn3.decode(line2);
        if (ctx3.getStart() != ctx1.getStart()) continue;
        if (ctx3.getEnd() != ctx1.getEnd()) continue;

        if (ctx1.getReference().equals(ctx3.getReference())
            && ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
          variantsList.clear();
          variantsList.add(ctx3);
          break;
        } else {
          variantsList.add(ctx3);
        }
      }

      for (VariantContext ctx3 : variantsList) {

        if (this.REF_ALLELE_MATTERS && !ctx1.getReference().equals(ctx3.getReference())) {
          continue;
        }
        if (this.ALT_ALLELES_MATTERS
            && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
          continue;
        }

        if (ctx3.getID() != null && this.REPLACE_ID) {
          if (BEST_ID != null && best_id_match_alt) {
            // nothing
          } else {
            BEST_ID = ctx3.getID();
            best_id_match_alt = ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles());
          }
        }

        for (String id : this.INFO_IDS) {
          Object info3 = ctx3.getAttribute(id);
          if (info3 == null) {
            continue;
          }
          Object info1 = ctx1.getAttribute(id);
          if (info1 != null && !this.REPLACE_INFO_FIELD) {
            continue;
          }

          vcb.attribute(id, info3);
        }

        if (ALT_CONFLICT_FLAG != null
            && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
          vcb.attribute(ALT_CONFLICT_FLAG, true);
        }
      }
      if (BEST_ID != null) {
        vcb.id(BEST_ID);
      }
      w.add(vcb.make());
    }
    tabix.close();
  }
Ejemplo n.º 6
0
  @Override
  protected void doWork(VcfIterator r, VariantContextWriter w) throws IOException {
    long nChanged = 0L;
    final String TAG = "INDELFIXED";
    VCFHeader header = r.getHeader();

    VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
    h2.addMetaDataLine(
        new VCFInfoHeaderLine(TAG, 1, VCFHeaderLineType.String, "Fix Indels for @SolenaLS."));

    w.writeHeader(h2);

    final Pattern dna = Pattern.compile("[ATGCatgc]+");
    while (r.hasNext()) {
      VariantContext ctx = r.next();
      VariantContextBuilder b = new VariantContextBuilder(ctx);
      List<Allele> alleles = ctx.getAlternateAlleles();
      if (alleles.size() != 1
          || !dna.matcher(ctx.getReference().getBaseString()).matches()
          || !dna.matcher(alleles.get(0).getBaseString()).matches()) {
        w.add(ctx);
        continue;
      }
      StringBuffer ref = new StringBuffer(ctx.getReference().getBaseString().toUpperCase());
      StringBuffer alt = new StringBuffer(alleles.get(0).getBaseString().toUpperCase());
      int start = ctx.getStart();
      int end = ctx.getEnd();

      boolean changed = false;

      /** ** we trim on the right side *** */
      // REF=TGCTGCGGGGGCCGCTGCGGGGG 	ALT=TGCTGCGGGGG
      while (alt.length() > 1
          && alt.length() < ref.length()
          && ref.charAt(ref.length() - 1) == alt.charAt(alt.length() - 1)) {
        changed = true;
        ref.setLength(ref.length() - 1);
        alt.deleteCharAt(alt.length() - 1);
        end--;
      }

      // REF=TGCTGCGGGGG 	ALT= TGCTGCGGGGGCCGCTGCGGGGG
      while (ref.length() > 1
          && alt.length() > ref.length()
          && ref.charAt(ref.length() - 1) == alt.charAt(alt.length() - 1)) {
        changed = true;
        ref.setLength(ref.length() - 1);
        alt.deleteCharAt(alt.length() - 1);
        end--;
      }

      /** ** we trim on the left side *** */

      // REF=TGCTGCGGGGGCCGCTGCGGGGG 	ALT=TGCTGCGGGGG
      while (alt.length() > 1 && alt.length() < ref.length() && ref.charAt(0) == alt.charAt(0)) {
        changed = true;
        ref.deleteCharAt(0);
        alt.deleteCharAt(0);
        start++;
      }

      // REF=TGCTGCGGGGG 	ALT= TGCTGCGGGGGCCGCTGCGGGGG
      while (ref.length() > 1 && alt.length() > ref.length() && ref.charAt(0) == alt.charAt(0)) {
        changed = true;
        ref.deleteCharAt(0);
        alt.deleteCharAt(0);
        start++;
      }

      if (!changed) {
        w.add(ctx);
        continue;
      }

      /*
      LOG.info(line);
      LOG.info("ctx.getStart() "+ctx.getStart());
      LOG.info("ctx.getEnd() "+ ctx.getEnd());



      LOG.info("start " + start);
      LOG.info("end "+end);
      LOG.info("ref " + ref.toString());
      LOG.info("alt "+alt.toString());
      */

      Allele newRef = Allele.create(ref.toString(), true);
      Allele newAlt = Allele.create(alt.toString(), false);

      Allele newalleles[] = new Allele[] {newRef, newAlt};

      b.attribute(
          TAG,
          ctx.getReference().getBaseString()
              + "|"
              + alleles.get(0).getBaseString()
              + "|"
              + ctx.getStart());
      b.start(start);
      b.stop(end);
      b.alleles(Arrays.asList(newalleles));

      nChanged++;

      VariantContext ctx2 = b.make();
      try {
        w.add(ctx2);
      } catch (TribbleException err) {
        error(err, "Cannot convert new context:" + ctx2 + " old context:" + ctx);
        w.add(ctx);
      }
    }

    info("indels changed:" + nChanged);
  }