@Override protected void doWork(VcfIterator r, VariantContextWriter w) throws IOException { long nChanged = 0L; final String TAG = "INDELFIXED"; VCFHeader header = r.getHeader(); VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder()); h2.addMetaDataLine( new VCFInfoHeaderLine(TAG, 1, VCFHeaderLineType.String, "Fix Indels for @SolenaLS.")); w.writeHeader(h2); final Pattern dna = Pattern.compile("[ATGCatgc]+"); while (r.hasNext()) { VariantContext ctx = r.next(); VariantContextBuilder b = new VariantContextBuilder(ctx); List<Allele> alleles = ctx.getAlternateAlleles(); if (alleles.size() != 1 || !dna.matcher(ctx.getReference().getBaseString()).matches() || !dna.matcher(alleles.get(0).getBaseString()).matches()) { w.add(ctx); continue; } StringBuffer ref = new StringBuffer(ctx.getReference().getBaseString().toUpperCase()); StringBuffer alt = new StringBuffer(alleles.get(0).getBaseString().toUpperCase()); int start = ctx.getStart(); int end = ctx.getEnd(); boolean changed = false; /** ** we trim on the right side *** */ // REF=TGCTGCGGGGGCCGCTGCGGGGG ALT=TGCTGCGGGGG while (alt.length() > 1 && alt.length() < ref.length() && ref.charAt(ref.length() - 1) == alt.charAt(alt.length() - 1)) { changed = true; ref.setLength(ref.length() - 1); alt.deleteCharAt(alt.length() - 1); end--; } // REF=TGCTGCGGGGG ALT= TGCTGCGGGGGCCGCTGCGGGGG while (ref.length() > 1 && alt.length() > ref.length() && ref.charAt(ref.length() - 1) == alt.charAt(alt.length() - 1)) { changed = true; ref.setLength(ref.length() - 1); alt.deleteCharAt(alt.length() - 1); end--; } /** ** we trim on the left side *** */ // REF=TGCTGCGGGGGCCGCTGCGGGGG ALT=TGCTGCGGGGG while (alt.length() > 1 && alt.length() < ref.length() && ref.charAt(0) == alt.charAt(0)) { changed = true; ref.deleteCharAt(0); alt.deleteCharAt(0); start++; } // REF=TGCTGCGGGGG ALT= TGCTGCGGGGGCCGCTGCGGGGG while (ref.length() > 1 && alt.length() > ref.length() && ref.charAt(0) == alt.charAt(0)) { changed = true; ref.deleteCharAt(0); alt.deleteCharAt(0); start++; } if (!changed) { w.add(ctx); continue; } /* LOG.info(line); LOG.info("ctx.getStart() "+ctx.getStart()); LOG.info("ctx.getEnd() "+ ctx.getEnd()); LOG.info("start " + start); LOG.info("end "+end); LOG.info("ref " + ref.toString()); LOG.info("alt "+alt.toString()); */ Allele newRef = Allele.create(ref.toString(), true); Allele newAlt = Allele.create(alt.toString(), false); Allele newalleles[] = new Allele[] {newRef, newAlt}; b.attribute( TAG, ctx.getReference().getBaseString() + "|" + alleles.get(0).getBaseString() + "|" + ctx.getStart()); b.start(start); b.stop(end); b.alleles(Arrays.asList(newalleles)); nChanged++; VariantContext ctx2 = b.make(); try { w.add(ctx2); } catch (TribbleException err) { error(err, "Cannot convert new context:" + ctx2 + " old context:" + ctx); w.add(ctx); } } info("indels changed:" + nChanged); }