예제 #1
0
  private VariantContext getDbsnp(String rsID) {
    if (dbsnpIterator == null) {

      if (dbsnp == null)
        throw new UserException.BadInput(
            "No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");

      RMDTrackBuilder builder =
          new RMDTrackBuilder(
              getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
              getToolkit().getGenomeLocParser(),
              getToolkit().getArguments().unsafe,
              getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods,
              null);
      dbsnpIterator =
          builder
              .createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource()))
              .getIterator();
      // Note that we should really use some sort of seekable iterator here so that the search
      // doesn't take forever
      // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we
      // don't know where to seek to)
    }

    while (dbsnpIterator.hasNext()) {
      GATKFeature feature = dbsnpIterator.next();
      VariantContext vc = (VariantContext) feature.getUnderlyingObject();
      if (vc.getID().equals(rsID)) return vc;
    }

    return null;
  }
예제 #2
0
  @Override
  protected void doWork(String inputSource, VcfIterator r, VariantContextWriter w)
      throws IOException {
    VCFHeader header = r.getHeader();

    VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
    h2.addMetaDataLine(
        new VCFInfoHeaderLine(
            TAG,
            VCFHeaderLineCount.UNBOUNDED,
            VCFHeaderLineType.String,
            "metadata added from " + TABIX + " . Format was " + FORMAT));
    h2.addMetaDataLine(
        new VCFHeaderLine(
            getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    h2.addMetaDataLine(
        new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    h2.addMetaDataLine(
        new VCFHeaderLine(
            getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
    h2.addMetaDataLine(
        new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));

    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    w.writeHeader(h2);
    while (r.hasNext()) {
      VariantContext ctx = progress.watch(r.next());
      Set<String> annotations = new HashSet<String>();

      CloseableIterator<BedLine> iter =
          this.bedReader.iterator(ctx.getContig(), ctx.getStart() - 1, ctx.getEnd() + 1);
      while (iter.hasNext()) {
        BedLine bedLine = iter.next();

        if (!ctx.getContig().equals(bedLine.getContig())) continue;
        if (ctx.getStart() - 1 >= bedLine.getEnd()) continue;
        if (ctx.getEnd() - 1 < bedLine.getStart()) continue;

        String newannot = this.parsedFormat.toString(bedLine);
        if (!newannot.isEmpty()) annotations.add(VCFUtils.escapeInfoField(newannot));
      }
      CloserUtil.close(iter);

      if (annotations.isEmpty()) {
        w.add(ctx);
        continue;
      }
      VariantContextBuilder vcb = new VariantContextBuilder(ctx);
      vcb.attribute(TAG, annotations.toArray());
      w.add(vcb.make());
      incrVariantCount();
      if (checkOutputError()) break;
    }
    progress.finish();
  }
예제 #3
0
  @Override
  public int doWork(String[] args) {
    boolean compressed = false;
    int maxRecordsInRAM = 100000;
    long count = -1L;
    File fileout = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt opt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;
    while ((c = opt.getopt(args, getGetOptDefault() + "o:n:N:T:b")) != -1) {
      switch (c) {
        case 'b':
          compressed = true;
          break;
        case 'N':
          maxRecordsInRAM = Integer.parseInt(opt.getOptArg());
          break;
        case 'n':
          count = Long.parseLong(opt.getOptArg());
          break;
        case 'o':
          fileout = new File(opt.getOptArg());
          break;
        case 'T':
          this.addTmpDirectory(new File(opt.getOptArg()));
          break;
        default:
          {
            switch (handleOtherOptions(c, opt, null)) {
              case EXIT_FAILURE:
                return -1;
              case EXIT_SUCCESS:
                return 0;
              default:
                break;
            }
          }
      }
    }
    if (count < -1L) // -1 == infinite
    {
      error("Bad count:" + count);
      return -1;
    }
    SamReader samReader = null;
    SAMRecordIterator iter = null;
    SAMFileWriter samWriter = null;
    Random random = new Random();
    CloseableIterator<RandSamRecord> iter2 = null;
    try {
      SamFileReaderFactory.setDefaultValidationStringency(ValidationStringency.SILENT);
      if (opt.getOptInd() == args.length) {
        info("Reading from stdin");
        samReader = SamFileReaderFactory.mewInstance().openStdin();
      } else if (opt.getOptInd() + 1 == args.length) {
        File filename = new File(args[opt.getOptInd()]);
        info("Reading from " + filename);
        samReader = SamFileReaderFactory.mewInstance().open(filename);
      } else {
        error("Illegal number of arguments.");
        return -1;
      }
      SAMFileHeader header = samReader.getFileHeader();

      header = header.clone();
      header.setSortOrder(SortOrder.unsorted);
      header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine());
      SAMFileWriterFactory sfw = new SAMFileWriterFactory();
      sfw.setCreateIndex(false);
      sfw.setCreateMd5File(false);
      if (fileout == null) {
        if (compressed) {
          samWriter = sfw.makeBAMWriter(header, true, System.out);
        } else {
          samWriter = sfw.makeSAMWriter(header, true, System.out);
        }
      } else {
        samWriter = sfw.makeSAMOrBAMWriter(header, true, fileout);
        this.addTmpDirectory(fileout);
      }
      iter = samReader.iterator();
      SAMSequenceDictionaryProgress progress =
          new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary());

      SortingCollection<RandSamRecord> sorter =
          SortingCollection.newInstance(
              RandSamRecord.class,
              new RandSamRecordCodec(header),
              new RandSamRecordComparator(),
              maxRecordsInRAM,
              getTmpDirectories());
      sorter.setDestructiveIteration(true);
      while (iter.hasNext()) {
        RandSamRecord r = new RandSamRecord();
        r.rand_index = random.nextInt();
        r.samRecord = progress.watch(iter.next());

        sorter.add(r);
      }
      iter.close();
      iter = null;

      sorter.doneAdding();
      iter2 = sorter.iterator();
      if (count == -1) {
        while (iter2.hasNext()) {
          samWriter.addAlignment(iter2.next().samRecord);
        }
      } else {
        while (iter2.hasNext() && count > 0) {
          samWriter.addAlignment(iter2.next().samRecord);
          count--;
        }
      }
      iter2.close();
      iter2 = null;
      sorter.cleanup();
      progress.finish();
    } catch (Exception e) {
      error(e);
      return -1;
    } finally {
      CloserUtil.close(iter);
      CloserUtil.close(iter2);
      CloserUtil.close(samReader);
      CloserUtil.close(samWriter);
    }
    return 0;
  }
예제 #4
0
  @Override
  protected int doWork() {
    if (DELIM.length() != 1) {
      LOG.error("DELIM must have length==1 . Got " + DELIM.length());
      return -1;
    }
    InputStream in = System.in;
    SortingCollection<Cell> sorter = null;
    final Comparator<Cell> comparator =
        new Comparator<Biostar84786.Cell>() {
          @Override
          public int compare(final Cell o1, final Cell o2) {
            int i;
            i = (o1.col < o2.col ? -1 : o1.col > o2.col ? 1 : 0);
            if (i != 0) return i;
            i = (o1.row < o2.row ? -1 : o1.row > o2.row ? 1 : 0);
            if (i != 0) return i;
            return o1.content.compareTo(o2.content);
          }
        };
    try {
      final char delimiter = DELIM.charAt(0);
      sorter =
          SortingCollection.newInstance(
              Cell.class, new CellCodec(), comparator, super.MAX_RECORDS_IN_RAM);
      sorter.setDestructiveIteration(true);
      if (IN != null) {
        LOG.info("opening " + IN);
        in = IOUtils.openFileForReading(IN);
      }
      long row = 0L;
      long col = 0L;
      StringBuilder b = new StringBuilder();
      for (; ; ) {
        int c = in.read();
        if (c == '\n' || c == -1) {
          sorter.add(new Cell(row, col, b));
          row++;
          col = 0;
          b.setLength(0);
          if (c == -1) break;
          if (row % 10000 == 0) LOG.info("row:" + row);
        } else if (c == delimiter) {
          sorter.add(new Cell(row, col, b));
          b.setLength(0);
          col++;
        } else {
          b.append((char) c);
        }
      }
      sorter.doneAdding();
      if (IN != null) in.close();
      in = null;
      CloseableIterator<Cell> iter = sorter.iterator();
      long curr_col = -1L;
      long x = 0L;
      for (; ; ) {

        if (!iter.hasNext()) {
          System.out.println();
          break;
        }
        Cell c = iter.next();
        if (c.col != curr_col) {
          if (curr_col != -1L) System.out.println();
          x = 0L;
          curr_col = c.col;
        }
        if (x > 0L) System.out.print(DELIM);
        System.out.print(c.content);
        x++;
      }
      iter.close();
      LOG.info("Done.");
    } catch (Exception e) {
      e.printStackTrace();
      LOG.error(e, "BOUM");
      return -1;
    } finally {
      if (sorter != null) sorter.cleanup();
      if (in != null) CloserUtil.close(in);
    }
    return 0;
  }