Esempio n. 1
0
  public Map<String, Object> annotate(
      RefMetaDataTracker tracker,
      AnnotatorCompatible walker,
      ReferenceContext ref,
      Map<String, AlignmentContext> stratifiedContexts,
      VariantContext vc) {
    if (mendelianViolation == null) {
      if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
        mendelianViolation =
            new MendelianViolation(((VariantAnnotator) walker).minGenotypeQualityP);
      } else {
        throw new UserException(
            "Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
      }
    }

    Map<String, Object> toRet = new HashMap<String, Object>(1);
    boolean hasAppropriateGenotypes =
        vc.hasGenotype(motherId)
            && vc.getGenotype(motherId).hasLikelihoods()
            && vc.hasGenotype(fatherId)
            && vc.getGenotype(fatherId).hasLikelihoods()
            && vc.hasGenotype(childId)
            && vc.getGenotype(childId).hasLikelihoods();
    if (hasAppropriateGenotypes)
      toRet.put(
          "MVLR", mendelianViolation.violationLikelihoodRatio(vc, motherId, fatherId, childId));

    return toRet;
  }
Esempio n. 2
0
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }
Esempio n. 3
0
  /** Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */
  public void initialize() {
    // Get list of samples to include in the output
    List<String> rodNames = Arrays.asList(variantCollection.variants.getName());

    Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
    TreeSet<String> vcfSamples =
        new TreeSet<String>(
            SampleUtils.getSampleList(
                vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));

    Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
    Collection<String> samplesFromExpressions =
        SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);

    // first, add any requested samples
    samples.addAll(samplesFromFile);
    samples.addAll(samplesFromExpressions);
    samples.addAll(sampleNames);

    // if none were requested, we want all of them
    if (samples.isEmpty()) {
      samples.addAll(vcfSamples);
      NO_SAMPLES_SPECIFIED = true;
    }

    // now, exclude any requested samples
    Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles);
    samples.removeAll(XLsamplesFromFile);
    samples.removeAll(XLsampleNames);

    if (samples.size() == 0 && !NO_SAMPLES_SPECIFIED)
      throw new UserException(
          "All samples requested to be included were also requested to be excluded.");

    for (String sample : samples) logger.info("Including sample '" + sample + "'");

    // if user specified types to include, add these, otherwise, add all possible variant context
    // types to list of vc types to include
    if (TYPES_TO_INCLUDE.isEmpty()) {

      for (VariantContext.Type t : VariantContext.Type.values()) selectedTypes.add(t);

    } else {
      for (VariantContext.Type t : TYPES_TO_INCLUDE) selectedTypes.add(t);
    }
    // Initialize VCF header
    Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
    headerLines.add(new VCFHeaderLine("source", "SelectVariants"));

    if (KEEP_ORIGINAL_CHR_COUNTS) {
      headerLines.add(
          new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
      headerLines.add(
          new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
      headerLines.add(
          new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
    }
    vcfWriter.writeHeader(new VCFHeader(headerLines, samples));

    for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
      // It's not necessary that the user supply select names for the JEXL expressions, since those
      // expressions will only be needed for omitting records.  Make up the select names here.
      selectNames.add(String.format("select-%d", i));
    }

    jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS);

    // Look at the parameters to decide which analysis to perform
    DISCORDANCE_ONLY = discordanceTrack.isBound();
    if (DISCORDANCE_ONLY)
      logger.info(
          "Selecting only variants discordant with the track: " + discordanceTrack.getName());

    CONCORDANCE_ONLY = concordanceTrack.isBound();
    if (CONCORDANCE_ONLY)
      logger.info(
          "Selecting only variants concordant with the track: " + concordanceTrack.getName());

    if (MENDELIAN_VIOLATIONS) {
      if (FAMILY_STRUCTURE_FILE != null) {
        try {
          for (final String line : new XReadLines(FAMILY_STRUCTURE_FILE)) {
            MendelianViolation mv =
                new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
            if (samples.contains(mv.getSampleChild())
                && samples.contains(mv.getSampleDad())
                && samples.contains(mv.getSampleMom())) mvSet.add(mv);
          }
        } catch (FileNotFoundException e) {
          throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
        }
        if (outMVFile != null)
          try {
            outMVFileStream = new PrintStream(outMVFile);
          } catch (FileNotFoundException e) {
            throw new UserException.CouldNotCreateOutputFile(
                outMVFile, "Can't open output file", e);
          }
      } else
        mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
    } else if (!FAMILY_STRUCTURE.isEmpty()) {
      mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
      MENDELIAN_VIOLATIONS = true;
    }

    SELECT_RANDOM_NUMBER = numRandom > 0;
    if (SELECT_RANDOM_NUMBER) {
      logger.info("Selecting " + numRandom + " variants at random from the variant track");
      variantArray = new RandomVariantStructure[numRandom];
    }

    SELECT_RANDOM_FRACTION = fractionRandom > 0;
    if (SELECT_RANDOM_FRACTION)
      logger.info(
          "Selecting approximately "
              + 100.0 * fractionRandom
              + "% of the variants at random from the variant track");
  }