private VariantContext getDbsnp(String rsID) { if (dbsnpIterator == null) { if (dbsnp == null) throw new UserException.BadInput( "No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records"); RMDTrackBuilder builder = new RMDTrackBuilder( getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe, getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods, null); dbsnpIterator = builder .createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource())) .getIterator(); // Note that we should really use some sort of seekable iterator here so that the search // doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we // don't know where to seek to) } while (dbsnpIterator.hasNext()) { GATKFeature feature = dbsnpIterator.next(); VariantContext vc = (VariantContext) feature.getUnderlyingObject(); if (vc.getID().equals(rsID)) return vc; } return null; }
private Collection<VariantContext> getVariantContexts( RefMetaDataTracker tracker, ReferenceContext ref) { List<Feature> features = tracker.getValues(variants, ref.getLocus()); List<VariantContext> VCs = new ArrayList<VariantContext>(features.size()); for (Feature record : features) { if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) { // we need to special case the HapMap format because indels aren't handled correctly if (record instanceof RawHapMapFeature) { // is it an indel? RawHapMapFeature hapmap = (RawHapMapFeature) record; if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) { // get the dbsnp object corresponding to this record (needed to help us distinguish // between insertions and deletions) VariantContext dbsnpVC = getDbsnp(hapmap.getName()); if (dbsnpVC == null || dbsnpVC.isMixed()) continue; Map<String, Allele> alleleMap = new HashMap<String, Allele>(2); alleleMap.put( RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); alleleMap.put( RawHapMapFeature.INSERTION, Allele.create( (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions hapmap.updatePosition(dbsnpVC.getStart()); if (hapmap.getStart() < ref.getWindow().getStart()) { logger.warn( "Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping..."); continue; } } } // ok, we might actually be able to turn this record in a variant context VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref); if (vc != null) // sometimes the track has odd stuff in it that can't be converted VCs.add(vc); } } return VCs; }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0; Collection<VariantContext> contexts = getVariantContexts(tracker, ref); for (VariantContext vc : contexts) { VariantContextBuilder builder = new VariantContextBuilder(vc); // set the appropriate sample name if necessary if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) { Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make(); builder.genotypes(g); } final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make()); writeRecord(withID, tracker, ref.getLocus()); } return 1; }
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if (!wroteHeader) { wroteHeader = true; // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for (VCFHeaderLine field : hInfo) { if (field instanceof VCFFormatHeaderLine) { allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID()); } } samples = new LinkedHashSet<String>(); if (sampleName != null) { samples.add(sampleName); } else { // try VCF first samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if (samples.isEmpty()) { List<Feature> features = tracker.getValues(variants, loc); if (features.size() == 0) throw new IllegalStateException( "No rod data is present, but we just created a VariantContext"); Feature f = features.get(0); if (f instanceof RawHapMapFeature) samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } } vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); }