private VariantContext getDbsnp(String rsID) { if (dbsnpIterator == null) { if (dbsnp == null) throw new UserException.BadInput( "No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records"); RMDTrackBuilder builder = new RMDTrackBuilder( getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe, getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods, null); dbsnpIterator = builder .createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource())) .getIterator(); // Note that we should really use some sort of seekable iterator here so that the search // doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we // don't know where to seek to) } while (dbsnpIterator.hasNext()) { GATKFeature feature = dbsnpIterator.next(); VariantContext vc = (VariantContext) feature.getUnderlyingObject(); if (vc.getID().equals(rsID)) return vc; } return null; }
@Override protected void doWork(String inputSource, VcfIterator r, VariantContextWriter w) throws IOException { VCFHeader header = r.getHeader(); VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder()); h2.addMetaDataLine( new VCFInfoHeaderLine( TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "metadata added from " + TABIX + " . Format was " + FORMAT)); h2.addMetaDataLine( new VCFHeaderLine( getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine()))); h2.addMetaDataLine( new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion()))); h2.addMetaDataLine( new VCFHeaderLine( getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion())); h2.addMetaDataLine( new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome())); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header); w.writeHeader(h2); while (r.hasNext()) { VariantContext ctx = progress.watch(r.next()); Set<String> annotations = new HashSet<String>(); CloseableIterator<BedLine> iter = this.bedReader.iterator(ctx.getContig(), ctx.getStart() - 1, ctx.getEnd() + 1); while (iter.hasNext()) { BedLine bedLine = iter.next(); if (!ctx.getContig().equals(bedLine.getContig())) continue; if (ctx.getStart() - 1 >= bedLine.getEnd()) continue; if (ctx.getEnd() - 1 < bedLine.getStart()) continue; String newannot = this.parsedFormat.toString(bedLine); if (!newannot.isEmpty()) annotations.add(VCFUtils.escapeInfoField(newannot)); } CloserUtil.close(iter); if (annotations.isEmpty()) { w.add(ctx); continue; } VariantContextBuilder vcb = new VariantContextBuilder(ctx); vcb.attribute(TAG, annotations.toArray()); w.add(vcb.make()); incrVariantCount(); if (checkOutputError()) break; } progress.finish(); }
@Override public int doWork(String[] args) { boolean compressed = false; int maxRecordsInRAM = 100000; long count = -1L; File fileout = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = opt.getopt(args, getGetOptDefault() + "o:n:N:T:b")) != -1) { switch (c) { case 'b': compressed = true; break; case 'N': maxRecordsInRAM = Integer.parseInt(opt.getOptArg()); break; case 'n': count = Long.parseLong(opt.getOptArg()); break; case 'o': fileout = new File(opt.getOptArg()); break; case 'T': this.addTmpDirectory(new File(opt.getOptArg())); break; default: { switch (handleOtherOptions(c, opt, null)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } if (count < -1L) // -1 == infinite { error("Bad count:" + count); return -1; } SamReader samReader = null; SAMRecordIterator iter = null; SAMFileWriter samWriter = null; Random random = new Random(); CloseableIterator<RandSamRecord> iter2 = null; try { SamFileReaderFactory.setDefaultValidationStringency(ValidationStringency.SILENT); if (opt.getOptInd() == args.length) { info("Reading from stdin"); samReader = SamFileReaderFactory.mewInstance().openStdin(); } else if (opt.getOptInd() + 1 == args.length) { File filename = new File(args[opt.getOptInd()]); info("Reading from " + filename); samReader = SamFileReaderFactory.mewInstance().open(filename); } else { error("Illegal number of arguments."); return -1; } SAMFileHeader header = samReader.getFileHeader(); header = header.clone(); header.setSortOrder(SortOrder.unsorted); header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine()); SAMFileWriterFactory sfw = new SAMFileWriterFactory(); sfw.setCreateIndex(false); sfw.setCreateMd5File(false); if (fileout == null) { if (compressed) { samWriter = sfw.makeBAMWriter(header, true, System.out); } else { samWriter = sfw.makeSAMWriter(header, true, System.out); } } else { samWriter = sfw.makeSAMOrBAMWriter(header, true, fileout); this.addTmpDirectory(fileout); } iter = samReader.iterator(); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary()); SortingCollection<RandSamRecord> sorter = SortingCollection.newInstance( RandSamRecord.class, new RandSamRecordCodec(header), new RandSamRecordComparator(), maxRecordsInRAM, getTmpDirectories()); sorter.setDestructiveIteration(true); while (iter.hasNext()) { RandSamRecord r = new RandSamRecord(); r.rand_index = random.nextInt(); r.samRecord = progress.watch(iter.next()); sorter.add(r); } iter.close(); iter = null; sorter.doneAdding(); iter2 = sorter.iterator(); if (count == -1) { while (iter2.hasNext()) { samWriter.addAlignment(iter2.next().samRecord); } } else { while (iter2.hasNext() && count > 0) { samWriter.addAlignment(iter2.next().samRecord); count--; } } iter2.close(); iter2 = null; sorter.cleanup(); progress.finish(); } catch (Exception e) { error(e); return -1; } finally { CloserUtil.close(iter); CloserUtil.close(iter2); CloserUtil.close(samReader); CloserUtil.close(samWriter); } return 0; }
@Override protected int doWork() { if (DELIM.length() != 1) { LOG.error("DELIM must have length==1 . Got " + DELIM.length()); return -1; } InputStream in = System.in; SortingCollection<Cell> sorter = null; final Comparator<Cell> comparator = new Comparator<Biostar84786.Cell>() { @Override public int compare(final Cell o1, final Cell o2) { int i; i = (o1.col < o2.col ? -1 : o1.col > o2.col ? 1 : 0); if (i != 0) return i; i = (o1.row < o2.row ? -1 : o1.row > o2.row ? 1 : 0); if (i != 0) return i; return o1.content.compareTo(o2.content); } }; try { final char delimiter = DELIM.charAt(0); sorter = SortingCollection.newInstance( Cell.class, new CellCodec(), comparator, super.MAX_RECORDS_IN_RAM); sorter.setDestructiveIteration(true); if (IN != null) { LOG.info("opening " + IN); in = IOUtils.openFileForReading(IN); } long row = 0L; long col = 0L; StringBuilder b = new StringBuilder(); for (; ; ) { int c = in.read(); if (c == '\n' || c == -1) { sorter.add(new Cell(row, col, b)); row++; col = 0; b.setLength(0); if (c == -1) break; if (row % 10000 == 0) LOG.info("row:" + row); } else if (c == delimiter) { sorter.add(new Cell(row, col, b)); b.setLength(0); col++; } else { b.append((char) c); } } sorter.doneAdding(); if (IN != null) in.close(); in = null; CloseableIterator<Cell> iter = sorter.iterator(); long curr_col = -1L; long x = 0L; for (; ; ) { if (!iter.hasNext()) { System.out.println(); break; } Cell c = iter.next(); if (c.col != curr_col) { if (curr_col != -1L) System.out.println(); x = 0L; curr_col = c.col; } if (x > 0L) System.out.print(DELIM); System.out.print(c.content); x++; } iter.close(); LOG.info("Done."); } catch (Exception e) { e.printStackTrace(); LOG.error(e, "BOUM"); return -1; } finally { if (sorter != null) sorter.cleanup(); if (in != null) CloserUtil.close(in); } return 0; }