private void loadVCFListFile(ResourceLocator locator, List<Track> newTracks, Genome genome) throws IOException, TribbleIndexNotFoundException { TribbleListFeatureSource src = new TribbleListFeatureSource(locator.getPath(), genome); VCFHeader header = (VCFHeader) src.getHeader(); // Test if the input VCF file contains methylation rate data: // This is determined by testing for the presence of two sample format fields: MR and GB, used // in the // rendering of methylation rate. // MR is the methylation rate on a scale of 0 to 100% and GB is the number of bases that pass // filter for the position. GB is needed to avoid displaying positions for which limited // coverage // prevents reliable estimation of methylation rate. boolean enableMethylationRateSupport = (header.getFormatHeaderLine("MR") != null && header.getFormatHeaderLine("GB") != null); List<String> allSamples = new ArrayList(header.getGenotypeSamples()); VariantTrack t = new VariantTrack(locator, src, allSamples, enableMethylationRateSupport); // VCF tracks handle their own margin t.setMargin(0); newTracks.add(t); }
@Override protected void doWork(String inputSource, VcfIterator r, VariantContextWriter w) throws IOException { VCFHeader header = r.getHeader(); VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder()); h2.addMetaDataLine( new VCFInfoHeaderLine( TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "metadata added from " + TABIX + " . Format was " + FORMAT)); h2.addMetaDataLine( new VCFHeaderLine( getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine()))); h2.addMetaDataLine( new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion()))); h2.addMetaDataLine( new VCFHeaderLine( getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion())); h2.addMetaDataLine( new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome())); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header); w.writeHeader(h2); while (r.hasNext()) { VariantContext ctx = progress.watch(r.next()); Set<String> annotations = new HashSet<String>(); CloseableIterator<BedLine> iter = this.bedReader.iterator(ctx.getContig(), ctx.getStart() - 1, ctx.getEnd() + 1); while (iter.hasNext()) { BedLine bedLine = iter.next(); if (!ctx.getContig().equals(bedLine.getContig())) continue; if (ctx.getStart() - 1 >= bedLine.getEnd()) continue; if (ctx.getEnd() - 1 < bedLine.getStart()) continue; String newannot = this.parsedFormat.toString(bedLine); if (!newannot.isEmpty()) annotations.add(VCFUtils.escapeInfoField(newannot)); } CloserUtil.close(iter); if (annotations.isEmpty()) { w.add(ctx); continue; } VariantContextBuilder vcb = new VariantContextBuilder(ctx); vcb.attribute(TAG, annotations.toArray()); w.add(vcb.make()); incrVariantCount(); if (checkOutputError()) break; } progress.finish(); }
@Override public void initialize( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified // -A SnpEff // without providing a SnpEff rod via --snpEffFile): if (!isValidRodBinding(walker.getSnpEffRodBinding())) { canAnnotate = false; return; } RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding(); // Make sure that the SnpEff version number and command-line header lines are present in the VCF // header of // the SnpEff rod, and that the file was generated by a supported version of SnpEff: VCFHeader snpEffVCFHeader = GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())) .get(snpEffRodBinding.getName()); VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) { canAnnotate = false; return; } // If everything looks ok, add the SnpEff version number and command-line header lines to the // header of the VCF output file, changing the key names so that our output file won't be // mistaken in the future for a SnpEff output file: headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); // Can only be called from VariantAnnotator if (!(walker instanceof VariantAnnotator)) { if (walker != null) logger.warn( "Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName()); else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator"); canAnnotate = false; return; } }
@Override public void execute() { VariantContextWriter vcw = new VariantContextWriterBuilder() .setOutputFile(out) .clearIndexCreator() .setReferenceDictionary(VCF.getFileHeader().getSequenceDictionary()) .build(); VCFHeader vh = VCF.getFileHeader(); vh.addMetaDataLine( new VCFInfoHeaderLine( "region", 1, VCFHeaderLineType.String, "region type in which the variant occurs")); vcw.writeHeader(VCF.getFileHeader()); IntervalTreeMap<String> itm = loadRegions(); int num = 0; for (VariantContext vc : VCF) { if (!vc.isFiltered() && vc.getGenotype(CHILD).isCalled() && vc.getGenotype(MOTHER).isCalled() && vc.getGenotype(FATHER).isCalled() && vc.getGenotype(CHILD).getType().equals(GenotypeType.HET) && vc.getGenotype(MOTHER).getType().equals(GenotypeType.HOM_REF) && vc.getGenotype(FATHER).getType().equals(GenotypeType.HOM_REF) && (!vc.getGenotype(CHILD).hasAnyAttribute("GQ") || (vc.getGenotype(CHILD).getGQ() > GQ_THRESHOLD && vc.getGenotype(MOTHER).getGQ() > GQ_THRESHOLD && vc.getGenotype(FATHER).getGQ() > GQ_THRESHOLD)) && (!vc.getGenotype(CHILD).hasAnyAttribute("DP") || (vc.getGenotype(CHILD).getDP() > DP_THRESHOLD && vc.getGenotype(MOTHER).getDP() > DP_THRESHOLD && vc.getGenotype(FATHER).getDP() > DP_THRESHOLD))) { vcw.add(vc); num++; } } vcw.close(); log.info("num: {}", num); }
public VariantContextTestData(final VCFHeader header, final List<VariantContext> vcs) { final Set<String> samples = new HashSet<String>(); for (final VariantContext vc : vcs) if (vc.hasGenotypes()) samples.addAll(vc.getSampleNames()); this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples); this.vcs = vcs; }
public static void assertEquals(final VCFHeader actual, final VCFHeader expected) { Assert.assertEquals( actual.getMetaDataInSortedOrder().size(), expected.getMetaDataInSortedOrder().size(), "No VCF header lines"); // for some reason set.equals() is returning false but all paired elements are .equals(). // Perhaps compare to is busted? // Assert.assertEquals(actual.getMetaDataInInputOrder(), expected.getMetaDataInInputOrder()); final List<VCFHeaderLine> actualLines = new ArrayList<VCFHeaderLine>(actual.getMetaDataInSortedOrder()); final List<VCFHeaderLine> expectedLines = new ArrayList<VCFHeaderLine>(expected.getMetaDataInSortedOrder()); for (int i = 0; i < actualLines.size(); i++) { Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines"); } }
@Override protected Object doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsReadable(CHAIN); IOUtil.assertFileIsWritable(OUTPUT); IOUtil.assertFileIsWritable(REJECT); //////////////////////////////////////////////////////////////////////// // Setup the inputs //////////////////////////////////////////////////////////////////////// final LiftOver liftOver = new LiftOver(CHAIN); final VCFFileReader in = new VCFFileReader(INPUT, false); logger.info("Loading up the target reference genome."); final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); final Map<String, byte[]> refSeqs = new HashMap<>(); for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) { refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases()); } CloserUtil.close(walker); //////////////////////////////////////////////////////////////////////// // Setup the outputs //////////////////////////////////////////////////////////////////////// final VCFHeader inHeader = in.getFileHeader(); final VCFHeader outHeader = new VCFHeader(inHeader); outHeader.setSequenceDictionary(walker.getSequenceDictionary()); final VariantContextWriter out = new VariantContextWriterBuilder() .setOption(Options.INDEX_ON_THE_FLY) .setOutputFile(OUTPUT) .setReferenceDictionary(walker.getSequenceDictionary()) .build(); out.writeHeader(outHeader); final VariantContextWriter rejects = new VariantContextWriterBuilder() .setOutputFile(REJECT) .unsetOption(Options.INDEX_ON_THE_FLY) .build(); final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader()); for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line); rejects.writeHeader(rejectHeader); //////////////////////////////////////////////////////////////////////// // Read the input VCF, lift the records over and write to the sorting // collection. //////////////////////////////////////////////////////////////////////// long failedLiftover = 0, failedAlleleCheck = 0, total = 0; logger.info("Lifting variants over and sorting."); final SortingCollection<VariantContext> sorter = SortingCollection.newInstance( VariantContext.class, new VCFRecordCodec(outHeader), outHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR); ProgressLogger progress = new ProgressLogger(logger, 1000000, "read"); for (final VariantContext ctx : in) { ++total; final Interval source = new Interval( ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd()); final Interval target = liftOver.liftOver(source, 1.0); if (target == null) { rejects.add(new VariantContextBuilder(ctx).filter(FILTER_CANNOT_LIFTOVER).make()); failedLiftover++; } else { // Fix the alleles if we went from positive to negative strand final List<Allele> alleles = new ArrayList<>(); for (final Allele oldAllele : ctx.getAlleles()) { if (target.isPositiveStrand() || oldAllele.isSymbolic()) { alleles.add(oldAllele); } else { alleles.add( Allele.create( SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference())); } } // Build the new variant context final VariantContextBuilder builder = new VariantContextBuilder( ctx.getSource(), target.getContig(), target.getStart(), target.getEnd(), alleles); builder.id(ctx.getID()); builder.attributes(ctx.getAttributes()); builder.genotypes(ctx.getGenotypes()); builder.filters(ctx.getFilters()); builder.log10PError(ctx.getLog10PError()); // Check that the reference allele still agrees with the reference sequence boolean mismatchesReference = false; for (final Allele allele : builder.getAlleles()) { if (allele.isReference()) { final byte[] ref = refSeqs.get(target.getContig()); final String refString = StringUtil.bytesToString(ref, target.getStart() - 1, target.length()); if (!refString.equalsIgnoreCase(allele.getBaseString())) { mismatchesReference = true; } break; } } if (mismatchesReference) { rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make()); failedAlleleCheck++; } else { sorter.add(builder.make()); } } progress.record(ctx.getContig(), ctx.getStart()); } final NumberFormat pfmt = new DecimalFormat("0.0000%"); final String pct = pfmt.format((failedLiftover + failedAlleleCheck) / (double) total); logger.info("Processed ", total, " variants."); logger.info(Long.toString(failedLiftover), " variants failed to liftover."); logger.info( Long.toString(failedAlleleCheck), " variants lifted over but had mismatching reference alleles after lift over."); logger.info(pct, " of variants were not successfully lifted over and written to the output."); rejects.close(); in.close(); //////////////////////////////////////////////////////////////////////// // Write the sorted outputs to the final output file //////////////////////////////////////////////////////////////////////// sorter.doneAdding(); progress = new ProgressLogger(logger, 1000000, "written"); logger.info("Writing out sorted records to final VCF."); for (final VariantContext ctx : sorter) { out.add(ctx); progress.record(ctx.getContig(), ctx.getStart()); } out.close(); sorter.cleanup(); return null; }
@Override protected void doWork(String source, VcfIterator in, VariantContextWriter out) throws IOException { try { VCFHeader header = in.getHeader(); VCFHeader h2 = new VCFHeader(header); h2.addMetaDataLine( new VCFHeaderLine( getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine()))); h2.addMetaDataLine( new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion()))); h2.addMetaDataLine( new VCFHeaderLine( getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion())); h2.addMetaDataLine( new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome())); out.writeHeader(h2); final VepPredictionParser vepParser = new VepPredictionParser(header); final SnpEffPredictionParser snpEffparser = new SnpEffPredictionParser(header); final MyPredictionParser myPredParser = new MyPredictionParser(header); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary()); while (in.hasNext()) { this.checkKnimeCancelled(); VariantContext ctx = progress.watch(in.next()); boolean keep = false; for (SnpEffPredictionParser.SnpEffPrediction pred : snpEffparser.getPredictions(ctx)) { if (hasUserTem(pred.getSOTerms())) { keep = true; break; } } if (!keep) { for (VepPredictionParser.VepPrediction pred : vepParser.getPredictions(ctx)) { if (hasUserTem(pred.getSOTerms())) { keep = true; break; } } } if (!keep) { for (MyPredictionParser.MyPrediction pred : myPredParser.getPredictions(ctx)) { if (hasUserTem(pred.getSOTerms())) { keep = true; break; } } } if (isInverseResult()) keep = !keep; if (keep) { incrVariantCount(); out.add(ctx); } if (checkOutputError()) break; } progress.finish(); } finally { } }
@Override public void runCommand() { logger.info("MergeVCFColumnsCommand"); /* * Assumptions * (1) Only two vcfs that are sorted with the same contig order * (2) if contigs on it same order, then we will just skip that contig * (3) No overlapping samples allowed * * Output: * A vcf where intersecting sites are merged together and will only return biallelic markers * the info field will be cleared * the only GT FORMAT field will be there */ Collection<File> vcfs = applicationOptions.getVcfs(); String outfile = applicationOptions.getOutFile(); if (vcfs.size() != 2) { throw new IllegalArgumentException("This function requires exactly two vcfs"); } Iterator<File> vcfFileIter = vcfs.iterator(); File vcf1 = vcfFileIter.next(); File vcf2 = vcfFileIter.next(); VCFFileReader reader1 = new VCFFileReader(vcf1, false); VCFFileReader reader2 = new VCFFileReader(vcf2, false); Iterator<VariantContext> iter1 = reader1.iterator(); Iterator<VariantContext> iter2 = reader2.iterator(); VariantContextComparator comparator = new VariantContextComparator(); /* * Merge headers */ VCFHeader header1 = reader1.getFileHeader(); VCFHeader header2 = reader2.getFileHeader(); List<String> samples1 = header1.getGenotypeSamples(); List<String> samples2 = header2.getGenotypeSamples(); List<String> mergedSamples = new ArrayList<>(samples1.size() + samples2.size()); mergedSamples.addAll(samples1); mergedSamples.addAll(samples2); // Validate that there are no duplicates HashSet<String> sampleSet = new HashSet<String>(); for (String id : mergedSamples) { if (sampleSet.contains(id)) { throw new IllegalArgumentException("Duplicate id found: " + id); } else { sampleSet.add(id); } } HashSet<VCFHeaderLine> meta = new HashSet<>(); meta.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "GT")); meta.addAll(header1.getContigLines()); VCFHeader mergedHeader = new VCFHeader(meta, mergedSamples); /* * Create encoder */ VCFEncoder encoder = new VCFEncoder(mergedHeader, false, false); BufferedWriter writer = null; try { if (outfile.endsWith(".gz")) { BlockCompressedOutputStream outstream = new BlockCompressedOutputStream(new File(outfile)); writer = new BufferedWriter(new OutputStreamWriter(outstream)); } else { writer = Files.newBufferedWriter( Paths.get(outfile), Charset.defaultCharset(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); } /* * Write header */ VCFHeaderWriter.writeHeader(writer, mergedHeader); logger.info("Wrote header"); VariantContext previous1 = null; VariantContext previous2 = null; int count = 0; int countFile1 = 0; int countFile2 = 0; boolean usePrevious1 = false; boolean usePrevious2 = false; while (iter1.hasNext() || iter2.hasNext()) { if ((iter1.hasNext() || usePrevious1) && (iter2.hasNext() || usePrevious2)) { VariantContext variant1 = null; VariantContext variant2 = null; // if(usePrevious1 == true && usePrevious2 == true && // comparator.compare(previous1,previous2) != 0) { // //then skip both // usePrevious1 = false; // usePrevious2 = false; // } if (usePrevious1) { variant1 = previous1; } else { variant1 = iter1.next(); countFile1++; } if (usePrevious2) { variant2 = previous2; } else { variant2 = iter2.next(); countFile2++; } // check that variants are ordered correctly if (previous1 != null && previous1 != variant1 && comparator.compare(previous1, variant1) > 0) { throw new IllegalStateException( previous1.getContig() + ":" + previous1.getStart() + " > " + variant1.getContig() + ":" + variant1.getStart()); } if (previous2 != null && previous2 != variant2 && comparator.compare(previous2, variant2) > 0) { throw new IllegalStateException( previous2.getContig() + ":" + previous2.getStart() + " > " + variant2.getContig() + ":" + variant2.getStart()); } int cmp = comparator.compare(variant1, variant2); if (cmp < 0) { // logger.info("Skipping VCF1: " + variant1.getContig() + ":" + variant1.getStart() + // "\t" + variant1.getReference().toString() + "\t" + variant1.getAlternateAlleles()); if (usePrevious1 == true && usePrevious2 == true) { // variant1 < variant2 // we need to go to next variant in vcf1 usePrevious1 = false; } usePrevious2 = true; } else if (cmp > 0) { if (usePrevious1 == true && usePrevious2 == true) { // variant1 > variant2 // we need to go to next variant in vcf2 usePrevious2 = false; } usePrevious1 = true; // logger.info("Skipping VCF2: " + variant2.getContig() + ":" + variant2.getStart() + // "\t" + variant2.getReference().toString() + "\t" + variant2.getAlternateAlleles()); } else { // they equal position usePrevious1 = false; usePrevious2 = false; if (variant1.isBiallelic() && variant2.isBiallelic() && variant1.getReference().equals(variant2.getReference()) && variant1.getAlternateAllele(0).equals(variant2.getAlternateAllele(0))) { // TODO: Finish merging // both variants are bialleleic and the reference and alternative alleles match count++; if (count % 10000 == 0) { logger.info(count + " mergeable variants found"); } VariantContext merged = VariantContextMerger.merge(variant1, variant2); writer.write(encoder.encode(merged)); writer.write("\n"); } else { // skip if they do not equal // logger.info("Skipping: " + variant1.getContig() + ":" + variant1.getStart() + // "\t" + variant1.getReference().toString() + "\t" + // variant1.getAlternateAlleles()); // logger.info("Skipping: " + variant2.getContig() + ":" + variant2.getStart() + // "\t" + variant2.getReference().toString() + "\t" + // variant2.getAlternateAlleles()); } } previous1 = variant1; previous2 = variant2; } else if (iter1.hasNext()) { // just skip remaining variants VariantContext current = iter1.next(); countFile1++; if (previous1 != null && current != null && comparator.compare(previous1, current) > 0) { throw new IllegalStateException( previous1.getContig() + ":" + previous1.getStart() + " > " + current.getContig() + ":" + current.getStart()); } previous1 = current; // logger.info("Skipping: " + previous1.getContig() + ":" + previous1.getStart() + "\t" + // previous1.getReference().toString() + "\t" + previous1.getAlternateAlleles()); } else if (iter2.hasNext()) { // just skip remaining variants // fixed bug/ was iter1 changed to iter2 VariantContext current = iter2.next(); countFile2++; if (previous2 != null && current != null && comparator.compare(previous2, current) > 0) { throw new IllegalStateException( previous2.getContig() + ":" + previous2.getStart() + " > " + current.getContig() + ":" + current.getStart()); } previous2 = current; // logger.info("Skipping: " + previous2.getContig() + ":" + previous2.getStart() + "\t" + // previous2.getReference().toString() + "\t" + previous2.getAlternateAlleles()); } else { throw new IllegalStateException("Error should not of reached this point"); } } reader1.close(); reader2.close(); logger.info(count + " merged variants"); logger.info(countFile1 + " variants in " + vcf1.getAbsolutePath()); logger.info(countFile2 + " variants in " + vcf2.getAbsolutePath()); } catch (Exception e) { e.printStackTrace(); } finally { if (writer != null) { try { logger.info("Flushing writer"); writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } logger.info("finished merging vcfs"); }
@Override protected void doWork(VcfIterator r, VariantContextWriter w) throws IOException { AbstractVCFCodec codeIn3 = VCFUtils.createDefaultVCFCodec(); String line; StringWriter sw = new StringWriter(); LOG.info("opening tabix file: " + this.TABIX); TabixReader tabix = new TabixReader(this.TABIX); while ((line = tabix.readLine()) != null) { if (!line.startsWith(VCFHeader.HEADER_INDICATOR)) { break; } sw.append(line).append("\n"); } VCFHeader header3 = (VCFHeader) codeIn3.readActualHeader( new LineIteratorImpl( LineReaderUtil.fromBufferedStream( new ByteArrayInputStream(sw.toString().getBytes())))); VCFHeader header1 = r.getHeader(); VCFHeader h2 = new VCFHeader(header1.getMetaDataInInputOrder(), header1.getSampleNamesInOrder()); for (String infoId : this.INFO_IDS) { VCFInfoHeaderLine vihl = header3.getInfoHeaderLine(infoId); if (vihl == null) { LOG.warn("Not INFO=" + infoId + " in " + TABIX); continue; } if (h2.getInfoHeaderLine(infoId) != null) { LOG.warn("Input already contains INFO=" + vihl); } h2.addMetaDataLine(vihl); } if (ALT_CONFLICT_FLAG != null) { h2.addMetaDataLine( new VCFInfoHeaderLine( ALT_CONFLICT_FLAG, 1, VCFHeaderLineType.Flag, "conflict ALT allele with " + this.TABIX)); } w.writeHeader(h2); while (r.hasNext()) { VariantContext ctx1 = r.next(); VariantContextBuilder vcb = new VariantContextBuilder(ctx1); String line2; String BEST_ID = null; boolean best_id_match_alt = false; List<VariantContext> variantsList = new ArrayList<VariantContext>(); int[] array = tabix.parseReg(ctx1.getChr() + ":" + (ctx1.getStart()) + "-" + (ctx1.getEnd())); TabixReader.Iterator iter = null; if (array != null && array.length == 3 && array[0] != -1 && array[1] >= 0 && array[2] >= 0) { iter = tabix.query(array[0], array[1], array[2]); } else { LOG.info("Cannot get " + ctx1.getChr() + ":" + (ctx1.getStart()) + "-" + (ctx1.getEnd())); } while (iter != null && (line2 = iter.next()) != null) { VariantContext ctx3 = codeIn3.decode(line2); if (ctx3.getStart() != ctx1.getStart()) continue; if (ctx3.getEnd() != ctx1.getEnd()) continue; if (ctx1.getReference().equals(ctx3.getReference()) && ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) { variantsList.clear(); variantsList.add(ctx3); break; } else { variantsList.add(ctx3); } } for (VariantContext ctx3 : variantsList) { if (this.REF_ALLELE_MATTERS && !ctx1.getReference().equals(ctx3.getReference())) { continue; } if (this.ALT_ALLELES_MATTERS && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) { continue; } if (ctx3.getID() != null && this.REPLACE_ID) { if (BEST_ID != null && best_id_match_alt) { // nothing } else { BEST_ID = ctx3.getID(); best_id_match_alt = ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles()); } } for (String id : this.INFO_IDS) { Object info3 = ctx3.getAttribute(id); if (info3 == null) { continue; } Object info1 = ctx1.getAttribute(id); if (info1 != null && !this.REPLACE_INFO_FIELD) { continue; } vcb.attribute(id, info3); } if (ALT_CONFLICT_FLAG != null && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) { vcb.attribute(ALT_CONFLICT_FLAG, true); } } if (BEST_ID != null) { vcb.id(BEST_ID); } w.add(vcb.make()); } tabix.close(); }
@Override public ListVector convert(Iterable<Genotype> genotypes) { NamedBuilder fullBuilder = ListVector.newNamedBuilder(); // add a named element in the build for each sample for (Genotype gt : genotypes) { NamedBuilder itemBuilder = ListVector.newNamedBuilder(); // add id itemBuilder.add("ID", gt.getSampleName()); if (header == null) { // only add genotype itemBuilder.add("GT", getGT(gt)); } else { itemBuilder.add("GT", getGT(gt)); Collection<VCFFormatHeaderLine> formatLines = header.getFormatHeaderLines(); for (VCFFormatHeaderLine hl : formatLines) { String key = hl.getID(); VCFHeaderLineType type = hl.getType(); if (key.equals("GT")) { continue; } Object val = gt.getAnyAttribute(key); switch (type) { case String: { if (val instanceof List<?>) { List<String> items = (List<String>) val; StringArrayVector vec = new StringArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof String[]) { String[] items = (String[]) val; StringArrayVector vec = new StringArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof String) { String items = (String) val; StringArrayVector vec = new StringArrayVector(items); itemBuilder.add(key, vec); } break; } case Float: { if (val instanceof List<?>) { List<Double> items = (List<Double>) val; DoubleArrayVector vec = new DoubleArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof Double[]) { double[] items = ArrayUtils.toPrimitive((Double[]) val); DoubleArrayVector vec = new DoubleArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof Double) { Double items = (Double) val; DoubleArrayVector vec = new DoubleArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof String) { Double items = Double.parseDouble((String) val); DoubleArrayVector vec = new DoubleArrayVector(items); itemBuilder.add(key, vec); } break; } case Integer: { if (val instanceof List<?>) { List<Integer> items = (List<Integer>) val; int[] arr = new int[items.size()]; for (int i = 0; i < items.size(); i++) { arr[i] = items.get(i); } IntArrayVector vec = new IntArrayVector(arr); itemBuilder.add(key, vec); } else if (val instanceof Integer[]) { int[] items = ArrayUtils.toPrimitive((Integer[]) val); IntArrayVector vec = new IntArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof Integer) { Integer items = (Integer) val; IntArrayVector vec = new IntArrayVector(items); itemBuilder.add(key, vec); } else if (val instanceof String) { Integer items = Integer.parseInt((String) val); IntArrayVector vec = new IntArrayVector(items); itemBuilder.add(key, vec); } break; } case Character: { char value = (char) gt.getAnyAttribute(key); itemBuilder.add(key, new String(new char[] {value})); break; } default: throw new IllegalStateException(type + " is not supported"); } } } fullBuilder.add(gt.getSampleName(), itemBuilder.build()); } return fullBuilder.build(); }
@Override protected void doWork(VcfIterator r, VariantContextWriter w) throws IOException { long nChanged = 0L; final String TAG = "INDELFIXED"; VCFHeader header = r.getHeader(); VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder()); h2.addMetaDataLine( new VCFInfoHeaderLine(TAG, 1, VCFHeaderLineType.String, "Fix Indels for @SolenaLS.")); w.writeHeader(h2); final Pattern dna = Pattern.compile("[ATGCatgc]+"); while (r.hasNext()) { VariantContext ctx = r.next(); VariantContextBuilder b = new VariantContextBuilder(ctx); List<Allele> alleles = ctx.getAlternateAlleles(); if (alleles.size() != 1 || !dna.matcher(ctx.getReference().getBaseString()).matches() || !dna.matcher(alleles.get(0).getBaseString()).matches()) { w.add(ctx); continue; } StringBuffer ref = new StringBuffer(ctx.getReference().getBaseString().toUpperCase()); StringBuffer alt = new StringBuffer(alleles.get(0).getBaseString().toUpperCase()); int start = ctx.getStart(); int end = ctx.getEnd(); boolean changed = false; /** ** we trim on the right side *** */ // REF=TGCTGCGGGGGCCGCTGCGGGGG ALT=TGCTGCGGGGG while (alt.length() > 1 && alt.length() < ref.length() && ref.charAt(ref.length() - 1) == alt.charAt(alt.length() - 1)) { changed = true; ref.setLength(ref.length() - 1); alt.deleteCharAt(alt.length() - 1); end--; } // REF=TGCTGCGGGGG ALT= TGCTGCGGGGGCCGCTGCGGGGG while (ref.length() > 1 && alt.length() > ref.length() && ref.charAt(ref.length() - 1) == alt.charAt(alt.length() - 1)) { changed = true; ref.setLength(ref.length() - 1); alt.deleteCharAt(alt.length() - 1); end--; } /** ** we trim on the left side *** */ // REF=TGCTGCGGGGGCCGCTGCGGGGG ALT=TGCTGCGGGGG while (alt.length() > 1 && alt.length() < ref.length() && ref.charAt(0) == alt.charAt(0)) { changed = true; ref.deleteCharAt(0); alt.deleteCharAt(0); start++; } // REF=TGCTGCGGGGG ALT= TGCTGCGGGGGCCGCTGCGGGGG while (ref.length() > 1 && alt.length() > ref.length() && ref.charAt(0) == alt.charAt(0)) { changed = true; ref.deleteCharAt(0); alt.deleteCharAt(0); start++; } if (!changed) { w.add(ctx); continue; } /* LOG.info(line); LOG.info("ctx.getStart() "+ctx.getStart()); LOG.info("ctx.getEnd() "+ ctx.getEnd()); LOG.info("start " + start); LOG.info("end "+end); LOG.info("ref " + ref.toString()); LOG.info("alt "+alt.toString()); */ Allele newRef = Allele.create(ref.toString(), true); Allele newAlt = Allele.create(alt.toString(), false); Allele newalleles[] = new Allele[] {newRef, newAlt}; b.attribute( TAG, ctx.getReference().getBaseString() + "|" + alleles.get(0).getBaseString() + "|" + ctx.getStart()); b.start(start); b.stop(end); b.alleles(Arrays.asList(newalleles)); nChanged++; VariantContext ctx2 = b.make(); try { w.add(ctx2); } catch (TribbleException err) { error(err, "Cannot convert new context:" + ctx2 + " old context:" + ctx); w.add(ctx); } } info("indels changed:" + nChanged); }