Java Log Examples

Programming Language: Java

Namespace/Package Name: net.sf.picard.util

Class/Type: Log

Examples at hotexamples.com: 13

Java Log - 13 examples found. These are the top rated real world Java examples of net.sf.picard.util.Log extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getInstance(6)

info(5)

error(4)

warn(3)

debug(1)

Example #1

Show file

File: ControlFileReader.java Project: staliv/illumina2bam

  /** @throws IOException */
  private void readFileHeader() throws Exception {

    // fisrt four bytes are empty
    // it should be zero for new version of control file, backward compatibility
    int emptyBytes = this.readFourBytes(inputStream);
    if (emptyBytes != 0) {

      log.warn(
          "The first four bytes are not zero: "
              + emptyBytes
              + ". This is an old format control file.");
      this.totalClusters = emptyBytes;
      return;
    }

    // next four bytes should be version and greater or equal to the expected
    int version = this.readFourBytes(inputStream);
    if (version != this.EXPECTED_CONTROL_VERSION) {
      log.error("Unexpected version byte: " + version);
      throw new Exception("Unexpected version number in control file");
    }

    // next four bytes should be the total number of clusters
    this.totalClusters = this.readFourBytes(inputStream);
    log.info("The total number of clusters: " + this.getTotalClusters());
  }

Example #2

Show file

File: CollectInsertSizeMetrics.java Project: WimS83/Keats

  @Override
  protected void finish() {
    multiCollector.finish();

    final MetricsFile<InsertSizeMetrics, Integer> file = getMetricsFile();
    multiCollector.addAllLevelsToFile(file);

    if (file.getNumHistograms() == 0) {
      // can happen if user sets MINIMUM_PCT = 0.5, etc.
      log.warn(
          "All data categories were discarded because they contained < "
              + MINIMUM_PCT
              + " of the total aligned paired data.");
      final InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector allReadsCollector =
          (InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector)
              multiCollector.getAllReadsCollector();
      log.warn(
          "Total mapped pairs in all categories: "
              + (allReadsCollector == null
                  ? allReadsCollector
                  : allReadsCollector.getTotalInserts()));
    } else {
      file.write(OUTPUT);

      final int rResult;
      if (HISTOGRAM_WIDTH == null) {
        rResult =
            RExecutor.executeFromClasspath(
                HISTOGRAM_R_SCRIPT,
                OUTPUT.getAbsolutePath(),
                HISTOGRAM_FILE.getAbsolutePath(),
                INPUT.getName());
      } else {
        rResult =
            RExecutor.executeFromClasspath(
                HISTOGRAM_R_SCRIPT,
                OUTPUT.getAbsolutePath(),
                HISTOGRAM_FILE.getAbsolutePath(),
                INPUT.getName(),
                String.valueOf(
                    HISTOGRAM_WIDTH)); // HISTOGRAM_WIDTH is passed because R automatically sets
                                       // histogram width to the last
        // bin that has data, which may be less than HISTOGRAM_WIDTH and confuse the user.
      }

      if (rResult != 0) {
        throw new PicardException(
            "R script " + HISTOGRAM_R_SCRIPT + " failed with return code " + rResult);
      }
    }
  }

Example #3

Show file

File: ControlFileReader.java Project: staliv/illumina2bam

  @Override
  public Object next() {

    try {
      int nextByte = this.inputStream.readUnsignedShort();

      if (nextByte == -1) {
        log.warn(
            "There is no more cluster in Control file after cluster "
                + this.getCurrentCluster()
                + " in file "
                + this.getFileName());
        return null;
      }

      this.currentCluster++;
      /*
      Bit0: always empty (0)
      Bit1: was the read identified as a control?
      Bit2: was the match ambiguous?
      Bit3: did the read match the phiX tag?
      Bit4: did the read align to match the phiX tag?
      Bit5: did the read match the control index sequence? (specified in controls.fata, TGTCACA)
      Bits6,7: reserved for future use
      Bits8..15: the report key for the matched record in the controls.fasta file (specified by the REPOControl FilesRT_ KEY metadata)
      */
      nextByte = nextByte & 0x2;
      if (nextByte != 0) {
        this.currentControlClusters++;
      }

      return new Integer(nextByte);

    } catch (IOException ex) {
      log.error(ex, "Problem to read control file");
    }

    return null;
  }

Example #4

Show file

File: LiftOver.java Project: gkno/picard

  /**
   * Lift over the given interval to the new genome build.
   *
   * @param interval Interval to be lifted over.
   * @param liftOverMinMatch Minimum fraction of bases that must remap.
   * @return Interval in the output build coordinates, or null if it cannot be lifted over.
   */
  public Interval liftOver(final Interval interval, final double liftOverMinMatch) {
    if (interval.length() == 0) {
      throw new IllegalArgumentException(
          "Zero-length interval cannot be lifted over.  Interval: " + interval.getName());
    }
    Chain chainHit = null;
    TargetIntersection targetIntersection = null;
    // Number of bases in interval that can be lifted over must be >= this.
    double minMatchSize = liftOverMinMatch * interval.length();

    // Find the appropriate Chain, and the part of the chain corresponding to the interval to be
    // lifted over.
    for (final Chain chain : chains.getOverlaps(interval)) {
      final TargetIntersection candidateIntersection = targetIntersection(chain, interval);
      if (candidateIntersection != null
          && candidateIntersection.intersectionLength >= minMatchSize) {
        if (chainHit != null) {
          // In basic liftOver, multiple hits are not allowed.
          return null;
        }
        chainHit = chain;
        targetIntersection = candidateIntersection;
      } else if (candidateIntersection != null) {
        LOG.info(
            "Interval "
                + interval.getName()
                + " failed to match chain "
                + chain.id
                + " because intersection length "
                + candidateIntersection.intersectionLength
                + " < minMatchSize "
                + minMatchSize
                + " ("
                + (candidateIntersection.intersectionLength / (float) interval.length())
                + " < "
                + liftOverMinMatch
                + ")");
      }
    }
    if (chainHit == null) {
      // Can't be lifted over.
      return null;
    }

    return createToInterval(interval.getName(), targetIntersection);
  }

Example #5

Show file

File: AddOrReplaceReadGroups.java Project: gkno/picard

  protected int doWork() {
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);

    final SAMFileReader in = new SAMFileReader(INPUT);

    // create the read group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);

    log.info(
        String.format(
            "Created read group ID=%s PL=%s LB=%s SM=%s%n",
            rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Arrays.asList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter =
        new SAMFileWriterFactory()
            .makeSAMOrBAMWriter(
                outHeader, outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
      read.setAttribute(SAMTag.RG.name(), RGID);
      outWriter.addAlignment(read);
      progress.record(read);
    }

    // cleanup
    in.close();
    outWriter.close();
    return 0;
  }

Example #6

Show file

File: CollectTargetedMetrics.java Project: gkno/picard

/**
 * Both CollectTargetedPCRMetrics and CalculateHybridSelection metrics share virtually identical
 * program structures except for the name of their targeting mechanisms (e.g. bait set or amplicon
 * set). The shared behavior of these programs is encapsulated in CollectTargetedMetrics which is
 * then subclassed by CalculateHsMetrics and CollectTargetedPcrMetrics.
 *
 * <p>This program verifies the input parameters to TargetMetricsCollector and converts all files to
 * the format desired by TargetMetricsCollector. Then it instantiates a TargetMetricsCollector and
 * collects metric information for all reads in the INPUT sam file.
 */
public abstract class CollectTargetedMetrics extends CommandLineProgram {

  private static final Log log = Log.getInstance(CalculateHsMetrics.class);

  /**
   * The interval file to be fed to TargetMetricsCollector
   *
   * @return An interval file that denotes the intervals of the regions targeted by the probes for
   *     this run that is passed to the TargetMetricsCollector produced by makeCollector
   */
  protected abstract File getProbeIntervals();

  /** @return The name of the probe set used in this run, getProbeIntervals().getName() is */
  protected abstract String getProbeSetName();

  /**
   * A factory method for the TargetMetricsCollector to use this time. Examples of
   * TargetMetricsCollector: (TargetedPcrMetricsCollector, HsMetricsCalculator)
   *
   * @return A TargetMetricsCollector to which we will pass SAMRecords
   */
  protected abstract TargetMetricsCollector makeCollector(
      final Set<MetricAccumulationLevel> accumulationLevels,
      final List<SAMReadGroupRecord> samRgRecords,
      final ReferenceSequenceFile refFile,
      final File perTargetCoverage,
      final File targetIntervals,
      final File probeIntervals,
      final String probeSetName);

  @Option(
      shortName = "TI",
      doc = "An interval list file that contains the locations of the targets.")
  public File TARGET_INTERVALS;

  @Option(
      shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
      doc = "An aligned SAM or BAM file.")
  public File INPUT;

  @Option(
      shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
      doc = "The output file to write the metrics to.")
  public File OUTPUT;

  @Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.  ")
  public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL =
      CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);

  @Option(
      shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME,
      optional = true,
      doc = "The reference sequence aligned to.")
  public File REFERENCE_SEQUENCE;

  @Option(optional = true, doc = "An optional file to output per target coverage information to.")
  public File PER_TARGET_COVERAGE;

  /**
   * Asserts that files are readable and writable and then fires off an HsMetricsCalculator instance
   * to do the real work.
   */
  protected int doWork() {
    IoUtil.assertFileIsReadable(getProbeIntervals());
    IoUtil.assertFileIsReadable(TARGET_INTERVALS);
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);
    if (PER_TARGET_COVERAGE != null) IoUtil.assertFileIsWritable(PER_TARGET_COVERAGE);

    final SAMFileReader samReader = new SAMFileReader(INPUT);

    final File probeIntervals = getProbeIntervals();

    // Validate that the targets and baits have the same references as the reads file
    SequenceUtil.assertSequenceDictionariesEqual(
        samReader.getFileHeader().getSequenceDictionary(),
        IntervalList.fromFile(TARGET_INTERVALS).getHeader().getSequenceDictionary(),
        INPUT,
        TARGET_INTERVALS);
    SequenceUtil.assertSequenceDictionariesEqual(
        samReader.getFileHeader().getSequenceDictionary(),
        IntervalList.fromFile(probeIntervals).getHeader().getSequenceDictionary(),
        INPUT,
        probeIntervals);

    ReferenceSequenceFile ref = null;
    if (REFERENCE_SEQUENCE != null) {
      IoUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
      ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
      SequenceUtil.assertSequenceDictionariesEqual(
          samReader.getFileHeader().getSequenceDictionary(),
          ref.getSequenceDictionary(),
          INPUT,
          REFERENCE_SEQUENCE);
    }

    final TargetMetricsCollector collector =
        makeCollector(
            METRIC_ACCUMULATION_LEVEL,
            samReader.getFileHeader().getReadGroups(),
            ref,
            PER_TARGET_COVERAGE,
            TARGET_INTERVALS,
            probeIntervals,
            getProbeSetName());

    // Add each record to the requested collectors
    final Iterator<SAMRecord> records = samReader.iterator();
    final ProgressLogger progress = new ProgressLogger(log);

    while (records.hasNext()) {
      final SAMRecord sam = records.next();
      collector.acceptRecord(sam, null);
      progress.record(sam);
    }

    // Write the output file
    final MetricsFile<HsMetrics, Integer> metrics = getMetricsFile();
    collector.finish();

    collector.addAllLevelsToFile(metrics);

    metrics.write(OUTPUT);

    return 0;
  }

  protected String[] customCommandLineValidation() {
    if (PER_TARGET_COVERAGE != null
        && (METRIC_ACCUMULATION_LEVEL.size() != 1
            || METRIC_ACCUMULATION_LEVEL.iterator().next() != MetricAccumulationLevel.ALL_READS)) {
      return new String[] {
        "PER_TARGET_COVERAGE can be specified only when METRIC_ACCUMULATION_LEVEL is set "
            + "to ALL_READS."
      };
    }

    if (PER_TARGET_COVERAGE != null && REFERENCE_SEQUENCE == null) {
      return new String[] {"Must supply REFERENCE_SEQUENCE when supplying PER_TARGET_COVERAGE"};
    }

    return super.customCommandLineValidation();
  }
}

Example #7

Show file

File: LiftOver.java Project: gkno/picard

/**
 * Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented. Internally
 * coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive.
 *
 * @author [email protected]
 */
public class LiftOver {
  private static final Log LOG = Log.getInstance(LiftOver.class);

  public static final double DEFAULT_LIFTOVER_MINMATCH = 0.95;

  private double liftOverMinMatch = DEFAULT_LIFTOVER_MINMATCH;
  private final OverlapDetector<Chain> chains;

  /** Load UCSC chain file in order to lift over Intervals. */
  public LiftOver(File chainFile) {
    IoUtil.assertFileIsReadable(chainFile);
    chains = Chain.loadChains(chainFile);
  }

  /**
   * Throw an exception if all the "to" sequence names in the chains are not found in the given
   * sequence dictionary.
   */
  public void validateToSequences(final SAMSequenceDictionary sequenceDictionary) {
    for (final Chain chain : chains.getAll()) {
      if (sequenceDictionary.getSequence(chain.toSequenceName) == null) {
        throw new PicardException(
            "Sequence "
                + chain.toSequenceName
                + " from chain file is not found in sequence dictionary.");
      }
    }
  }

  /**
   * Lift over the given interval to the new genome build using the liftOverMinMatch set for this
   * LiftOver object.
   *
   * @param interval Interval to be lifted over.
   * @return Interval in the output build coordinates, or null if it cannot be lifted over.
   */
  public Interval liftOver(final Interval interval) {
    return liftOver(interval, liftOverMinMatch);
  }

  /**
   * Lift over the given interval to the new genome build.
   *
   * @param interval Interval to be lifted over.
   * @param liftOverMinMatch Minimum fraction of bases that must remap.
   * @return Interval in the output build coordinates, or null if it cannot be lifted over.
   */
  public Interval liftOver(final Interval interval, final double liftOverMinMatch) {
    if (interval.length() == 0) {
      throw new IllegalArgumentException(
          "Zero-length interval cannot be lifted over.  Interval: " + interval.getName());
    }
    Chain chainHit = null;
    TargetIntersection targetIntersection = null;
    // Number of bases in interval that can be lifted over must be >= this.
    double minMatchSize = liftOverMinMatch * interval.length();

    // Find the appropriate Chain, and the part of the chain corresponding to the interval to be
    // lifted over.
    for (final Chain chain : chains.getOverlaps(interval)) {
      final TargetIntersection candidateIntersection = targetIntersection(chain, interval);
      if (candidateIntersection != null
          && candidateIntersection.intersectionLength >= minMatchSize) {
        if (chainHit != null) {
          // In basic liftOver, multiple hits are not allowed.
          return null;
        }
        chainHit = chain;
        targetIntersection = candidateIntersection;
      } else if (candidateIntersection != null) {
        LOG.info(
            "Interval "
                + interval.getName()
                + " failed to match chain "
                + chain.id
                + " because intersection length "
                + candidateIntersection.intersectionLength
                + " < minMatchSize "
                + minMatchSize
                + " ("
                + (candidateIntersection.intersectionLength / (float) interval.length())
                + " < "
                + liftOverMinMatch
                + ")");
      }
    }
    if (chainHit == null) {
      // Can't be lifted over.
      return null;
    }

    return createToInterval(interval.getName(), targetIntersection);
  }

  public List<PartialLiftover> diagnosticLiftover(final Interval interval) {
    final List<PartialLiftover> ret = new ArrayList<PartialLiftover>();
    if (interval.length() == 0) {
      throw new IllegalArgumentException(
          "Zero-length interval cannot be lifted over.  Interval: " + interval.getName());
    }
    for (final Chain chain : chains.getOverlaps(interval)) {
      Interval intersectingChain = interval.intersect(chain.interval);
      final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain);
      if (targetIntersection == null) {
        ret.add(new PartialLiftover(intersectingChain, chain.id));
      } else {
        Interval toInterval = createToInterval(interval.getName(), targetIntersection);
        float percentLiftedOver = targetIntersection.intersectionLength / (float) interval.length();
        ret.add(
            new PartialLiftover(
                intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver));
      }
    }
    return ret;
  }

  private static Interval createToInterval(
      final String intervalName, final TargetIntersection targetIntersection) {
    // Compute the query interval given the offsets of the target interval start and end into the
    // first and
    // last ContinuousBlocks.
    int toStart =
        targetIntersection.chain.getBlock(targetIntersection.firstBlockIndex).toStart
            + targetIntersection.startOffset;
    int toEnd =
        targetIntersection.chain.getBlock(targetIntersection.lastBlockIndex).getToEnd()
            - targetIntersection.offsetFromEnd;
    if (toEnd <= toStart || toStart < 0) {
      throw new PicardException("Something strange lifting over interval " + intervalName);
    }

    if (targetIntersection.chain.toNegativeStrand) {
      // Flip if query is negative.
      int negativeStart = targetIntersection.chain.toSequenceSize - toEnd;
      int negativeEnd = targetIntersection.chain.toSequenceSize - toStart;
      toStart = negativeStart;
      toEnd = negativeEnd;
    }
    // Convert to 1-based, inclusive.
    return new Interval(
        targetIntersection.chain.toSequenceName,
        toStart + 1,
        toEnd,
        targetIntersection.chain.toNegativeStrand,
        intervalName);
  }

  /**
   * Add up overlap btw the blocks in this chain and the given interval.
   *
   * @return Length of overlap, offsets into first and last ContinuousBlocks, and indices of first
   *     and last ContinuousBlocks.
   */
  private static TargetIntersection targetIntersection(final Chain chain, final Interval interval) {
    int intersectionLength = 0;
    // Convert interval to 0-based, half-open
    int start = interval.getStart() - 1;
    int end = interval.getEnd();
    int firstBlockIndex = -1;
    int lastBlockIndex = -1;
    int startOffset = -1;
    int offsetFromEnd = -1;
    List<Chain.ContinuousBlock> blockList = chain.getBlocks();
    for (int i = 0; i < blockList.size(); ++i) {
      final Chain.ContinuousBlock block = blockList.get(i);
      if (block.fromStart >= end) {
        break;
      } else if (block.getFromEnd() <= start) {
        continue;
      }
      if (firstBlockIndex == -1) {
        firstBlockIndex = i;
        if (start > block.fromStart) {
          startOffset = start - block.fromStart;
        } else {
          startOffset = 0;
        }
      }
      lastBlockIndex = i;
      if (block.getFromEnd() > end) {
        offsetFromEnd = block.getFromEnd() - end;
      } else {
        offsetFromEnd = 0;
      }
      int thisIntersection = Math.min(end, block.getFromEnd()) - Math.max(start, block.fromStart);
      if (thisIntersection <= 0) {
        throw new PicardException("Should have been some intersection.");
      }
      intersectionLength += thisIntersection;
    }
    if (intersectionLength == 0) {
      return null;
    }
    return new TargetIntersection(
        chain, intersectionLength, startOffset, offsetFromEnd, firstBlockIndex, lastBlockIndex);
  }

  /** Get minimum fraction of bases that must remap. */
  public double getLiftOverMinMatch() {
    return liftOverMinMatch;
  }

  /** Set minimum fraction of bases that must remap. */
  public void setLiftOverMinMatch(final double liftOverMinMatch) {
    this.liftOverMinMatch = liftOverMinMatch;
  }

  /** Value class returned by targetIntersection() */
  private static class TargetIntersection {
    /** Chain used for this intersection */
    final Chain chain;
    /** Total intersectionLength length */
    final int intersectionLength;
    /** Offset of target interval start in first block. */
    final int startOffset;
    /** Distance from target interval end to end of last block. */
    final int offsetFromEnd;
    /** Index of first ContinuousBlock matching interval. */
    final int firstBlockIndex;
    /** Index of last ContinuousBlock matching interval. */
    final int lastBlockIndex;

    TargetIntersection(
        final Chain chain,
        final int intersectionLength,
        final int startOffset,
        final int offsetFromEnd,
        final int firstBlockIndex,
        final int lastBlockIndex) {
      this.chain = chain;
      this.intersectionLength = intersectionLength;
      this.startOffset = startOffset;
      this.offsetFromEnd = offsetFromEnd;
      this.firstBlockIndex = firstBlockIndex;
      this.lastBlockIndex = lastBlockIndex;
    }
  }

  /** Represents a portion of a liftover operation, for use in diagnosing liftover failures. */
  public static class PartialLiftover {
    /** Intersection between "from" interval and "from" region of a chain. */
    final Interval fromInterval;
    /**
     * Result of lifting over fromInterval (with no percentage mapped requirement). This is null if
     * fromInterval falls entirely with a gap of the chain.
     */
    final Interval toInterval;
    /** id of chain used for this liftover */
    final int chainId;
    /**
     * Percentage of bases in fromInterval that lifted over. 0 if fromInterval is not covered by any
     * chain.
     */
    final float percentLiftedOver;

    PartialLiftover(
        final Interval fromInterval,
        final Interval toInterval,
        final int chainId,
        final float percentLiftedOver) {
      this.fromInterval = fromInterval;
      this.toInterval = toInterval;
      this.chainId = chainId;
      this.percentLiftedOver = percentLiftedOver;
    }

    PartialLiftover(final Interval fromInterval, final int chainId) {
      this.fromInterval = fromInterval;
      this.toInterval = null;
      this.chainId = chainId;
      this.percentLiftedOver = 0.0f;
    }

    public String toString() {
      if (toInterval == null) {
        // Matched a chain, but entirely within a gap.
        return fromInterval.toString()
            + " (len "
            + fromInterval.length()
            + ")=>null using chain "
            + chainId;
      }
      final String strand = toInterval.isNegativeStrand() ? "-" : "+";
      return fromInterval.toString()
          + " (len "
          + fromInterval.length()
          + ")=>"
          + toInterval
          + "("
          + strand
          + ") using chain "
          + chainId
          + " ; pct matched "
          + percentLiftedOver;
    }
  }
}

Example #8

Show file

File: AddOrReplaceReadGroups.java Project: gkno/picard

/**
 * Replaces read groups in a BAM file
 *
 * @author mdepristo
 */
public class AddOrReplaceReadGroups extends CommandLineProgram {
  @Usage(programVersion = "1.0")
  public String USAGE =
      "Replaces all read groups in the INPUT file with a new read group and assigns "
          + "all reads to this read group in the OUTPUT BAM";

  @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input file (bam or sam).")
  public File INPUT = null;

  @Option(
      shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
      doc = "Output file (bam or sam).")
  public File OUTPUT = null;

  @Option(
      shortName = StandardOptionDefinitions.SORT_ORDER_SHORT_NAME,
      optional = true,
      doc =
          "Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.")
  public SortOrder SORT_ORDER;

  @Option(shortName = "ID", doc = "Read Group ID")
  public String RGID = "1";

  @Option(shortName = "LB", doc = "Read Group Library")
  public String RGLB;

  @Option(shortName = "PL", doc = "Read Group platform (e.g. illumina, solid)")
  public String RGPL;

  @Option(shortName = "PU", doc = "Read Group platform unit (eg. run barcode)")
  public String RGPU;

  @Option(shortName = "SM", doc = "Read Group sample name")
  public String RGSM;

  @Option(shortName = "CN", doc = "Read Group sequencing center name", optional = true)
  public String RGCN;

  @Option(shortName = "DS", doc = "Read Group description", optional = true)
  public String RGDS;

  @Option(shortName = "DT", doc = "Read Group run date", optional = true)
  public Iso8601Date RGDT;

  private final Log log = Log.getInstance(AddOrReplaceReadGroups.class);

  /** Required main method implementation. */
  public static void main(final String[] argv) {
    new AddOrReplaceReadGroups().instanceMainWithExit(argv);
  }

  protected int doWork() {
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);

    final SAMFileReader in = new SAMFileReader(INPUT);

    // create the read group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);

    log.info(
        String.format(
            "Created read group ID=%s PL=%s LB=%s SM=%s%n",
            rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Arrays.asList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter =
        new SAMFileWriterFactory()
            .makeSAMOrBAMWriter(
                outHeader, outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
      read.setAttribute(SAMTag.RG.name(), RGID);
      outWriter.addAlignment(read);
      progress.record(read);
    }

    // cleanup
    in.close();
    outWriter.close();
    return 0;
  }
}

Example #9

Show file

File: CollectInsertSizeMetrics.java Project: WimS83/Keats

/**
 * Command line program to read non-duplicate insert sizes, create a histogram and report
 * distribution statistics.
 *
 * @author Doug Voet (dvoet at broadinstitute dot org)
 */
public class CollectInsertSizeMetrics extends SinglePassSamProgram {
  private static final Log log = Log.getInstance(CollectInsertSizeMetrics.class);
  private static final String HISTOGRAM_R_SCRIPT = "net/sf/picard/analysis/insertSizeHistogram.R";
  // Usage and parameters
  @Usage
  public String USAGE =
      getStandardUsagePreamble()
          + "Reads a SAM or BAM file and writes a file containing metrics about "
          + "the statistical distribution of insert size (excluding duplicates) "
          + "and generates a histogram plot.\n";

  @Option(shortName = "H", doc = "File to write insert size histogram chart to.")
  public File HISTOGRAM_FILE;

  @Option(
      doc =
          "Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. "
              + "This is done because insert size data typically includes enough anomalous values from chimeras and other "
              + "artifacts to make the mean and sd grossly misleading regarding the real distribution.")
  public double DEVIATIONS = 10;

  @Option(
      shortName = "W",
      doc =
          "Explicitly sets the histogram width, overriding automatic truncation of histogram tail. "
              + "Also, when calculating mean and standard deviation, only bins <= HISTOGRAM_WIDTH will be included.",
      optional = true)
  public Integer HISTOGRAM_WIDTH = null;

  @Option(
      shortName = "M",
      doc =
          "When generating the histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this "
              + "percentage of overall reads. (Range: 0 to 1).")
  public float MINIMUM_PCT = 0.05f;

  @Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.  ")
  private Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL =
      CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);

  // Calculates InsertSizeMetrics for all METRIC_ACCUMULATION_LEVELs provided
  private InsertSizeMetricsCollector multiCollector;

  /** Required main method implementation. */
  public static void main(final String[] argv) {
    new CollectInsertSizeMetrics().instanceMainWithExit(argv);
  }

  /**
   * Put any custom command-line validation in an override of this method. clp is initialized at
   * this point and can be used to print usage and access argv. Any options set by command-line
   * parser can be validated.
   *
   * @return null if command line is valid. If command line is invalid, returns an array of error
   *     message to be written to the appropriate place.
   */
  @Override
  protected String[] customCommandLineValidation() {
    if (MINIMUM_PCT < 0 || MINIMUM_PCT > 0.5) {
      return new String[] {
        "MINIMUM_PCT was set to "
            + MINIMUM_PCT
            + ". It must be between 0 and 0.5 so all data categories don't get discarded."
      };
    }

    return super.customCommandLineValidation();
  }

  @Override
  protected boolean usesNoRefReads() {
    return false;
  }

  @Override
  protected void setup(final SAMFileHeader header, final File samFile) {
    IoUtil.assertFileIsWritable(OUTPUT);
    IoUtil.assertFileIsWritable(HISTOGRAM_FILE);

    // Delegate actual collection to InsertSizeMetricCollector
    multiCollector =
        new InsertSizeMetricsCollector(
            METRIC_ACCUMULATION_LEVEL,
            header.getReadGroups(),
            MINIMUM_PCT,
            HISTOGRAM_WIDTH,
            DEVIATIONS);
  }

  @Override
  protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
    multiCollector.acceptRecord(record, ref);
  }

  @Override
  protected void finish() {
    multiCollector.finish();

    final MetricsFile<InsertSizeMetrics, Integer> file = getMetricsFile();
    multiCollector.addAllLevelsToFile(file);

    if (file.getNumHistograms() == 0) {
      // can happen if user sets MINIMUM_PCT = 0.5, etc.
      log.warn(
          "All data categories were discarded because they contained < "
              + MINIMUM_PCT
              + " of the total aligned paired data.");
      final InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector allReadsCollector =
          (InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector)
              multiCollector.getAllReadsCollector();
      log.warn(
          "Total mapped pairs in all categories: "
              + (allReadsCollector == null
                  ? allReadsCollector
                  : allReadsCollector.getTotalInserts()));
    } else {
      file.write(OUTPUT);

      final int rResult;
      if (HISTOGRAM_WIDTH == null) {
        rResult =
            RExecutor.executeFromClasspath(
                HISTOGRAM_R_SCRIPT,
                OUTPUT.getAbsolutePath(),
                HISTOGRAM_FILE.getAbsolutePath(),
                INPUT.getName());
      } else {
        rResult =
            RExecutor.executeFromClasspath(
                HISTOGRAM_R_SCRIPT,
                OUTPUT.getAbsolutePath(),
                HISTOGRAM_FILE.getAbsolutePath(),
                INPUT.getName(),
                String.valueOf(
                    HISTOGRAM_WIDTH)); // HISTOGRAM_WIDTH is passed because R automatically sets
                                       // histogram width to the last
        // bin that has data, which may be less than HISTOGRAM_WIDTH and confuse the user.
      }

      if (rResult != 0) {
        throw new PicardException(
            "R script " + HISTOGRAM_R_SCRIPT + " failed with return code " + rResult);
      }
    }
  }
}

Example #10

Show file

File: ControlFileReader.java Project: staliv/illumina2bam

/**
 * This class is a reader of a control file
 *
 * @author Guoying Qi
 * @author Staffan Living
 */
public class ControlFileReader extends IlluminaFileReader {

  private final Log log = Log.getInstance(ControlFileReader.class);

  private final int EXPECTED_CONTROL_VERSION = 2;
  private int currentCluster = 0;
  private int totalClusters = 0;
  private int currentControlClusters = 0;

  /**
   * @param controlFileName control file name
   * @throws Exception
   */
  public ControlFileReader(String controlFileName) throws Exception {

    super(controlFileName);
    this.readFileHeader();
  }

  /** @throws IOException */
  private void readFileHeader() throws Exception {

    // fisrt four bytes are empty
    // it should be zero for new version of control file, backward compatibility
    int emptyBytes = this.readFourBytes(inputStream);
    if (emptyBytes != 0) {

      log.warn(
          "The first four bytes are not zero: "
              + emptyBytes
              + ". This is an old format control file.");
      this.totalClusters = emptyBytes;
      return;
    }

    // next four bytes should be version and greater or equal to the expected
    int version = this.readFourBytes(inputStream);
    if (version != this.EXPECTED_CONTROL_VERSION) {
      log.error("Unexpected version byte: " + version);
      throw new Exception("Unexpected version number in control file");
    }

    // next four bytes should be the total number of clusters
    this.totalClusters = this.readFourBytes(inputStream);
    log.info("The total number of clusters: " + this.getTotalClusters());
  }

  @Override
  public boolean hasNext() {

    return (this.getCurrentCluster() < this.getTotalClusters()) ? true : false;
  }

  @Override
  public Object next() {

    try {
      int nextByte = this.inputStream.readUnsignedShort();

      if (nextByte == -1) {
        log.warn(
            "There is no more cluster in Control file after cluster "
                + this.getCurrentCluster()
                + " in file "
                + this.getFileName());
        return null;
      }

      this.currentCluster++;
      /*
      Bit0: always empty (0)
      Bit1: was the read identified as a control?
      Bit2: was the match ambiguous?
      Bit3: did the read match the phiX tag?
      Bit4: did the read align to match the phiX tag?
      Bit5: did the read match the control index sequence? (specified in controls.fata, TGTCACA)
      Bits6,7: reserved for future use
      Bits8..15: the report key for the matched record in the controls.fasta file (specified by the REPOControl FilesRT_ KEY metadata)
      */
      nextByte = nextByte & 0x2;
      if (nextByte != 0) {
        this.currentControlClusters++;
      }

      return new Integer(nextByte);

    } catch (IOException ex) {
      log.error(ex, "Problem to read control file");
    }

    return null;
  }

  /** @return the currentCluster */
  public int getCurrentCluster() {
    return currentCluster;
  }

  /** @return the totalClusters */
  public int getTotalClusters() {
    return totalClusters;
  }

  /** @return the currentClusters */
  public int getCurrentControlClusters() {
    return currentControlClusters;
  }

  public static void main(String args[]) throws Exception {

    String controlFileName =
        "testdata/110323_HS13_06000_B_B039WABXX/Data/Intensities/BaseCalls/L001/s_1_1101.control";
    if (args.length > 0 && args[0] != null) {
      controlFileName = args[0];
    }

    ControlFileReader control = new ControlFileReader(controlFileName);

    int numberControlCluster = 0;
    while (control.hasNext()) {
      int nextCluster = (Integer) control.next();

      if (nextCluster != 0) {
        numberControlCluster++;
      }
    }
    System.out.println(numberControlCluster);
    System.out.println(control.getCurrentCluster());
    System.out.println(control.getCurrentControlClusters());

    // control.next();
  }
}

Example #11

Show file

File: VcfToSql.java Project: hangelwen/jvarkit

  private void read(InputStream in, String filename) throws IOException {
    // Pattern comma=Pattern.compile("[,]");
    Pattern pipe = Pattern.compile("[\\|]");
    Pattern amp = Pattern.compile("&");

    out.println("insert into FILE" + SUFFIX + "(filename) values (" + quote(filename) + ");");
    VcfIterator r = new VcfIterator(in);

    VCFHeader header = r.getHeader();

    String csqColumns[] = null;
    VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine("CSQ");
    if (infoHeader != null && this.USE_VEP) {
      LOG.info("parsing VEP " + infoHeader.getDescription());
      final String formatStr = "Format: ";
      int i = infoHeader.getDescription().indexOf(formatStr);
      if (i != -1) {
        csqColumns =
            pipe.split(infoHeader.getDescription().substring(i + formatStr.length()).trim());
        LOG.debug(Arrays.asList(csqColumns));
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }
    String snpEffColumns[] = null;
    infoHeader = header.getInfoHeaderLine("EFF");
    if (infoHeader != null && this.USE_SNPEFF) {
      LOG.info("parsing EFF " + infoHeader.getDescription());

      final String formatStr = ".Format: '";
      final String desc = infoHeader.getDescription();
      int i = desc.indexOf(formatStr);
      if (i != -1) i = desc.indexOf('(', i + formatStr.length());
      int j = desc.lastIndexOf(')');
      if (i != -1 && j > i) {
        snpEffColumns =
            pipe.split(desc.substring(i + 1, j).replaceAll("[ \\[\\]()\\.]", "").trim());
        LOG.info(Arrays.asList(snpEffColumns));
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }

    String nmdColumns[] = null;
    infoHeader = header.getInfoHeaderLine("NMD");
    if (infoHeader != null && this.USE_SNPEFF) {

      final String formatStr = " Format: '";
      final String desc = infoHeader.getDescription();
      int i = desc.indexOf(formatStr);
      int j = (i == -1 ? -1 : desc.lastIndexOf('\''));

      if (i != -1 && j > i) {
        nmdColumns =
            pipe.split(
                desc.substring(i + formatStr.length(), j).replaceAll("[ \\[\\]()\\.]", "").trim());
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }

    String lofColumns[] = null;
    infoHeader = header.getInfoHeaderLine("LOF");
    if (infoHeader != null && this.USE_SNPEFF) {

      final String formatStr = " Format: '";
      final String desc = infoHeader.getDescription();
      int i = desc.indexOf(formatStr);
      int j = (i == -1 ? -1 : desc.lastIndexOf('\''));

      if (i != -1 && j > i) {
        lofColumns =
            pipe.split(
                desc.substring(i + formatStr.length(), j).replaceAll("[ \\[\\]()\\.]", "").trim());
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }

    for (String S : header.getSampleNamesInOrder()) {
      // merge into SAMPLE using (select 1+MAX(id),'azdazd' from SAMPLE) as vals(x,y) on
      // SAMPLE.name=vals.y when  NOT MATCHED THEN INSERT VALUES vals.x,vals.y;
      switch (this.engine) {
        case hsql:
          out.println(
              "merge into SAMPLE"
                  + SUFFIX
                  + " using ( values("
                  + quote(S)
                  + ") ) "
                  + "AS vals(y) ON SAMPLE"
                  + SUFFIX
                  + ".name = vals.y "
                  + "WHEN NOT MATCHED THEN INSERT VALUES  (NULL,vals.y);");
          break;
        default:
          out.println(
              "insert or ignore into SAMPLE" + SUFFIX + "(name) values (" + quote(S) + ");");
          break;
      }
    }

    List<String> headers = new ArrayList<String>();

    for (VCFHeaderLine line : header.getMetaDataInSortedOrder()) {
      if (VCFHeaderVersion.isFormatString(line.getKey())) continue;
      headers.add(VCFHeader.METADATA_INDICATOR + line);
    }

    String chromLine = VCFHeader.HEADER_INDICATOR;
    for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
      if (!VCFHeader.HEADER_INDICATOR.equals(chromLine))
        chromLine += (VCFConstants.FIELD_SEPARATOR);
      chromLine += (field);
    }

    if (header.hasGenotypingData()) {
      chromLine += VCFConstants.FIELD_SEPARATOR + "FORMAT";
      for (String sample : header.getGenotypeSamples()) {
        chromLine += VCFConstants.FIELD_SEPARATOR;
        chromLine += sample;
      }
    }
    headers.add(chromLine);

    for (String line : headers) {
      out.println(
          "insert into HEADER"
              + SUFFIX
              + "(file_id,header) values ("
              + "(select max(id) from FILE"
              + SUFFIX
              + "),"
              + quote(line)
              + ");");
    }

    while (r.hasNext()) {
      VariantContext var = r.next();

      if (var == null) {
        LOG.error("Cannot parse VCF");
        continue;
      }
      // "create table if not exists FILE(id,filename text)";
      // "create table if not exists VARIATION(id,file_id,chrom,pos,start0,end0,rs_id,ref,qual)";

      out.println(
          "insert into VARIATION"
              + SUFFIX
              + "(file_id,chrom,pos,START0,END0,rs_id,ref,qual) values ("
              + "(select max(id) from FILE"
              + SUFFIX
              + "),"
              + quote(var.getChr())
              + ","
              + var.getStart()
              + ","
              + (var.getStart() - 1)
              + ","
              + var.getEnd()
              + ","
              + (var.getID() == null || var.getID().equals(VCFConstants.EMPTY_ID_FIELD)
                  ? "NULL"
                  : quote(var.getID()))
              + ","
              + quote(var.getReference().getDisplayString())
              + ","
              + (var.getPhredScaledQual() < 0 ? "NULL" : var.getPhredScaledQual())
              + ");");
      // "create table if not exists ALT(id,var_id,alt)";

      for (Allele alt : var.getAlternateAlleles()) {
        out.println(
            "insert into ALT"
                + SUFFIX
                + "(var_id,alt) values ("
                + "(select max(id) from VARIATION"
                + SUFFIX
                + "),"
                + quote(alt.getDisplayString())
                + ");");
      }
      // "create table if not exists FILTER(id,var_id,filter)";

      for (String filter : var.getFilters()) {
        out.println(
            "insert into FILTER"
                + SUFFIX
                + "(var_id,filter) values ("
                + "(select max(id) from VARIATION"
                + SUFFIX
                + "),"
                + quote(filter)
                + ");");
      }
      CommonInfo infos = var.getCommonInfo();
      for (String key : infos.getAttributes().keySet()) {
        Object val = infos.getAttribute(key);
        // "create table if not exists INFO(id,var_id,k,v)";

        if (SPLIT4 && key.equals("DP4")) {
          String dp4[] = infotoString(val).split("[,]");
          insertIntoInfo(quote(key + "[refFor]"), quote(dp4[0]));
          insertIntoInfo(quote(key + "[refRev]"), quote(dp4[1]));
          insertIntoInfo(quote(key + "[altFor]"), quote(dp4[2]));
          insertIntoInfo(quote(key + "[altRev]"), quote(dp4[3]));
        } else {
          insertIntoInfo(quote(key), quote(infotoString(val)));
        }

        if (key.equals("CSQ") && csqColumns != null) {
          List as_array = castToStringArray(val);

          for (Object csqs : as_array) {
            if (csqs.toString().isEmpty()) continue;
            String tokens[] = pipe.split(csqs.toString());
            List<String> extraInfo = new ArrayList<String>();
            for (int t = 0; t < tokens.length && t < csqColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              if (csqColumns[t].equals("Consequence")) {
                for (String pred : amp.split(tokens[t])) {
                  if (pred.isEmpty()) continue;
                  extraInfo.add(csqColumns[t]);
                  extraInfo.add(pred);
                }

              } else {
                extraInfo.add(csqColumns[t]);
                extraInfo.add(tokens[t]);
              }
            }
            insertExtraInfos("CSQ", extraInfo);
          }
        }

        if (key.equals("EFF") && snpEffColumns != null) {
          for (Object item : castToStringArray(val)) {
            String snpeff = item.toString();
            if (snpeff.isEmpty()) continue;
            int opar = snpeff.indexOf('(');
            if (opar == -1) continue;
            int cpar = snpeff.lastIndexOf(')');
            if (cpar == -1) continue;
            String tokens[] = pipe.split(snpeff.substring(opar + 1, cpar));
            List<String> h = new ArrayList<String>();
            h.add("Effect");
            h.add(snpeff.substring(0, opar));
            for (int t = 0; t < tokens.length && t < snpEffColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              h.add(snpEffColumns[t]);
              h.add(tokens[t]);
            }
            insertExtraInfos(key, h);
          }
        }

        if (key.equals("NMD") && nmdColumns != null) {

          for (Object item : castToStringArray(val)) {
            String nmd = item.toString();
            if (nmd.isEmpty()) continue;
            String tokens[] = pipe.split(nmd);
            List<String> h = new ArrayList<String>(nmdColumns.length * 2);
            for (int t = 0; t < tokens.length && t < nmdColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              h.add(nmdColumns[t]);
              h.add(tokens[t]);
            }
            insertExtraInfos(key, h);
          }
        }

        if (key.equals("LOF") && lofColumns != null) {

          for (Object item : castToStringArray(val)) {
            String lof = item.toString();
            if (lof.isEmpty()) continue;
            String tokens[] = pipe.split(lof);
            List<String> h = new ArrayList<String>(lofColumns.length * 2);
            for (int t = 0; t < tokens.length && t < lofColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              h.add(lofColumns[t]);
              h.add(tokens[t]);
            }
            insertExtraInfos(key, h);
          }
        }
      }
      GenotypesContext genotypesCtx = var.getGenotypes();
      for (Genotype g : genotypesCtx) {
        // "create table if not exists GENOTYPE(id,var_id,k,v)";

        List<Allele> alleles = g.getAlleles();

        out.println(
            "insert into GENOTYPE"
                + SUFFIX
                + "(var_id,sample_id,A1,A2,dp,ad,gq,pl,"
                + "is_phased,is_hom,is_homref,is_homvar,is_mixed,"
                + "is_nocall,is_noninformative,is_available,is_called,is_filtered"
                + ") values ("
                + "(select max(id) from VARIATION"
                + SUFFIX
                + "),"
                + "(select id from SAMPLE"
                + SUFFIX
                + " where name="
                + quote(g.getSampleName())
                + "),"
                + (alleles.size() == 2 ? quote(alleles.get(0).getBaseString()) : "NULL")
                + ","
                + (alleles.size() == 2 ? quote(alleles.get(1).getBaseString()) : "NULL")
                + ","
                + (g.hasDP() ? g.getDP() : "NULL")
                + ","
                + (g.hasAD() ? quote(infotoString(g.getAD())) : "NULL")
                + ","
                + (g.hasGQ() ? g.getGQ() : "NULL")
                + ","
                + (g.hasPL() ? quote(infotoString(g.getPL())) : "NULL")
                + ","
                + (g.isPhased() ? 1 : 0)
                + ","
                + (g.isHom() ? 1 : 0)
                + ","
                + (g.isHomRef() ? 1 : 0)
                + ","
                + (g.isHomVar() ? 1 : 0)
                + ","
                + (g.isMixed() ? 1 : 0)
                + ","
                + (g.isNoCall() ? 1 : 0)
                + ","
                + (g.isNonInformative() ? 1 : 0)
                + ","
                + (g.isAvailable() ? 1 : 0)
                + ","
                + (g.isCalled() ? 1 : 0)
                + ","
                + (g.isFiltered() ? 1 : 0)
                + ");");

        for (String key : g.getExtendedAttributes().keySet()) {
          Object val = g.getExtendedAttribute(key);
          if (val == null) continue;
          out.println(
              "insert into GTPROP"
                  + SUFFIX
                  + "(genotype_id,k,v) values ("
                  + "(select max(id) from GENOTYPE"
                  + SUFFIX
                  + "),"
                  + quote(key)
                  + ","
                  + quote(infotoString(val))
                  + ");");
        }
      }
    }
    r.close();
  }

Example #12

Show file

File: VcfToSql.java Project: hangelwen/jvarkit

@SuppressWarnings("rawtypes")
public class VcfToSql extends CommandLineProgram {

  @Usage(programVersion = "1.0")
  public String USAGE =
      getStandardUsagePreamble()
          + "Creates the code to insert one or more VCF into a SQL database. ";

  @Option(
      shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
      doc = "VCF files to process.",
      minElements = 0)
  public List<File> IN = new ArrayList<File>();

  @Option(shortName = "SFX", doc = "Table suffix", optional = true)
  public String SUFFIX = "";

  @Option(shortName = "VEP", doc = "Use  and explode VEP predictions", optional = true)
  public boolean USE_VEP = true;

  @Option(shortName = "SNPEFF", doc = "Use and explode SNPEFF predictions", optional = true)
  public boolean USE_SNPEFF = true;

  @Option(shortName = "SQLIDX", doc = "Create misc SQL Indexes.", optional = true)
  public boolean SQLINDEX = true;

  @Option(shortName = "EGN", doc = "sql engine [sqlite,hsql]", optional = true)
  public String ENGINE = SQLEngine.sqlite.name();

  @Option(shortName = "S4", doc = "Split DP4", optional = true)
  public boolean SPLIT4 = false;

  private SQLEngine engine = SQLEngine.sqlite;

  private enum SQLEngine {
    sqlite,
    hsql
  };

  private static Log LOG = Log.getInstance(VcfToSql.class);

  private PrintWriter out = new PrintWriter(System.out);

  @Override
  public String getVersion() {
    return "1.0";
  }

  private String columnId() {
    switch (this.engine) {
      case hsql:
        return "id INTEGER GENERATED ALWAYS AS IDENTITY(START WITH 1, INCREMENT BY 1) PRIMARY KEY,";
      default:
        return "id INTEGER PRIMARY KEY AUTOINCREMENT,";
    }
  }

  private String varchar(int length) {
    switch (this.engine) {
      case hsql:
        return "VARCHAR(" + length + ")";
      default:
        return "TEXT";
    }
  }

  private String text() {
    switch (this.engine) {
      case hsql:
        return "LONGVARCHAR";
      default:
        return "TEXT";
    }
  }

  @Override
  protected int doWork() {
    try {
      try {
        this.engine = SQLEngine.valueOf(this.ENGINE);
      } catch (Exception err) {
        LOG.error("BAD SQL ENGINE " + this.ENGINE);
        return -1;
      }
      out.println(
          "create table if not exists FILE"
              + SUFFIX
              + "("
              + columnId()
              + "filename "
              + varchar(255)
              + " NOT NULL"
              + ");");

      out.println(
          "create table if not exists HEADER"
              + SUFFIX
              + "("
              + columnId()
              + "file_id INT NOT NULL REFERENCES FILE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "header "
              + text()
              + ");");

      out.println(
          "create table if not exists SAMPLE"
              + SUFFIX
              + "("
              + columnId()
              + "name "
              + varchar(100)
              + " NOT NULL UNIQUE"
              + ");");
      out.println(
          "create table if not exists VARIATION"
              + SUFFIX
              + "("
              + columnId()
              + "file_id INT NOT NULL REFERENCES FILE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "CHROM VARCHAR(20) NOT NULL,"
              + "POS INT NOT NULL,"
              + "START0 INT NOT NULL,"
              + "END0 INT NOT NULL,"
              + "RS_ID VARCHAR(50),"
              + "REF "
              + text()
              + " NOT NULL,"
              + "QUAL FLOAT"
              + ");");

      out.println(
          "create table if not exists ALT"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "ALT "
              + text()
              + ");");
      out.println(
          "create table if not exists FILTER"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "FILTER varchar(50) not null"
              + ");");

      out.println(
          "create table if not exists INFO"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "k varchar(50) not null,"
              + "v "
              + text()
              + " not null"
              + ");");

      out.println(
          "create table if not exists EXTRAINFO"
              + SUFFIX
              + "("
              + columnId()
              + "info_id INT NOT NULL REFERENCES INFO"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "type varchar(50) not null"
              + ");");

      out.println(
          "create table if not exists EXTRAINFOPROP"
              + SUFFIX
              + "("
              + columnId()
              + "extrainfo_id INT NOT NULL REFERENCES EXTRAINFO"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "k varchar(50) not null,"
              + "v "
              + text()
              + " not null"
              + ");");

      out.println(
          "create table if not exists GENOTYPE"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "sample_id INT NOT NULL REFERENCES SAMPLE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "A1 "
              + text()
              + ", A2 "
              + text()
              + ", dp int, ad varchar(50), gq float,pl "
              + text()
              + ","
              + "is_phased SMALLINT not null,is_hom SMALLINT not null,is_homref  SMALLINT not null,is_homvar  SMALLINT not null,is_mixed  SMALLINT not null,"
              + "is_nocall SMALLINT not null,is_noninformative SMALLINT not null,is_available SMALLINT not null,is_called SMALLINT not null,is_filtered  SMALLINT not null"
              + ");");
      out.println(
          "create table if not exists GTPROP"
              + SUFFIX
              + "("
              + columnId()
              + "genotype_id INT NOT NULL REFERENCES GENOTYPE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "k varchar(50) not null,"
              + "v "
              + text()
              + " not null"
              + ");");
      switch (this.engine) {
        case sqlite:
          out.println("begin transaction;");
          break;
        default:
          break;
      }

      if (IN.isEmpty()) {
        LOG.info("reading from stdin");
        read(System.in, "<stdin>");
      } else {
        for (File input : IN) {
          LOG.info("opening " + input);
          InputStream in = IOUtils.openFileForReading(input);
          read(in, input.toString());
          in.close();
        }
      }
      if (SQLINDEX) {
        index("SAMPLE", "name");
        index("EXTRAINFO", "type");
        index("EXTRAINFOPROP", "k");
        index("EXTRAINFOPROP", "v");

        index("INFO", "var_id");
        index("INFO", "k");
        index("EXTRAINFO", "info_id");
        index("EXTRAINFOPROP", "extrainfo_id");
        index("GENOTYPE", "var_id");
        index("GENOTYPE", "sample_id");
      }
      switch (this.engine) {
        case sqlite:
          out.println("commit;");
          break;
        default:
          break;
      }

      out.flush();
    } catch (IOException err) {
      err.printStackTrace();
      return -1;
    }
    return 0;
  }

  private void index(String table, String column) {
    out.print("create index ");

    switch (this.engine) {
      case hsql:
        break;
      default:
        out.print(" if not exists ");
        break;
    }

    out.print(
        " "
            + (table + SUFFIX + "_" + column + "_IDX").toUpperCase()
            + " on "
            + table
            + SUFFIX
            + "("
            + column
            + ");");
  }

  private void read(InputStream in, String filename) throws IOException {
    // Pattern comma=Pattern.compile("[,]");
    Pattern pipe = Pattern.compile("[\\|]");
    Pattern amp = Pattern.compile("&");

    out.println("insert into FILE" + SUFFIX + "(filename) values (" + quote(filename) + ");");
    VcfIterator r = new VcfIterator(in);

    VCFHeader header = r.getHeader();

    String csqColumns[] = null;
    VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine("CSQ");
    if (infoHeader != null && this.USE_VEP) {
      LOG.info("parsing VEP " + infoHeader.getDescription());
      final String formatStr = "Format: ";
      int i = infoHeader.getDescription().indexOf(formatStr);
      if (i != -1) {
        csqColumns =
            pipe.split(infoHeader.getDescription().substring(i + formatStr.length()).trim());
        LOG.debug(Arrays.asList(csqColumns));
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }
    String snpEffColumns[] = null;
    infoHeader = header.getInfoHeaderLine("EFF");
    if (infoHeader != null && this.USE_SNPEFF) {
      LOG.info("parsing EFF " + infoHeader.getDescription());

      final String formatStr = ".Format: '";
      final String desc = infoHeader.getDescription();
      int i = desc.indexOf(formatStr);
      if (i != -1) i = desc.indexOf('(', i + formatStr.length());
      int j = desc.lastIndexOf(')');
      if (i != -1 && j > i) {
        snpEffColumns =
            pipe.split(desc.substring(i + 1, j).replaceAll("[ \\[\\]()\\.]", "").trim());
        LOG.info(Arrays.asList(snpEffColumns));
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }

    String nmdColumns[] = null;
    infoHeader = header.getInfoHeaderLine("NMD");
    if (infoHeader != null && this.USE_SNPEFF) {

      final String formatStr = " Format: '";
      final String desc = infoHeader.getDescription();
      int i = desc.indexOf(formatStr);
      int j = (i == -1 ? -1 : desc.lastIndexOf('\''));

      if (i != -1 && j > i) {
        nmdColumns =
            pipe.split(
                desc.substring(i + formatStr.length(), j).replaceAll("[ \\[\\]()\\.]", "").trim());
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }

    String lofColumns[] = null;
    infoHeader = header.getInfoHeaderLine("LOF");
    if (infoHeader != null && this.USE_SNPEFF) {

      final String formatStr = " Format: '";
      final String desc = infoHeader.getDescription();
      int i = desc.indexOf(formatStr);
      int j = (i == -1 ? -1 : desc.lastIndexOf('\''));

      if (i != -1 && j > i) {
        lofColumns =
            pipe.split(
                desc.substring(i + formatStr.length(), j).replaceAll("[ \\[\\]()\\.]", "").trim());
      } else {
        LOG.error("Cannot parse " + infoHeader.getDescription());
      }
    }

    for (String S : header.getSampleNamesInOrder()) {
      // merge into SAMPLE using (select 1+MAX(id),'azdazd' from SAMPLE) as vals(x,y) on
      // SAMPLE.name=vals.y when  NOT MATCHED THEN INSERT VALUES vals.x,vals.y;
      switch (this.engine) {
        case hsql:
          out.println(
              "merge into SAMPLE"
                  + SUFFIX
                  + " using ( values("
                  + quote(S)
                  + ") ) "
                  + "AS vals(y) ON SAMPLE"
                  + SUFFIX
                  + ".name = vals.y "
                  + "WHEN NOT MATCHED THEN INSERT VALUES  (NULL,vals.y);");
          break;
        default:
          out.println(
              "insert or ignore into SAMPLE" + SUFFIX + "(name) values (" + quote(S) + ");");
          break;
      }
    }

    List<String> headers = new ArrayList<String>();

    for (VCFHeaderLine line : header.getMetaDataInSortedOrder()) {
      if (VCFHeaderVersion.isFormatString(line.getKey())) continue;
      headers.add(VCFHeader.METADATA_INDICATOR + line);
    }

    String chromLine = VCFHeader.HEADER_INDICATOR;
    for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
      if (!VCFHeader.HEADER_INDICATOR.equals(chromLine))
        chromLine += (VCFConstants.FIELD_SEPARATOR);
      chromLine += (field);
    }

    if (header.hasGenotypingData()) {
      chromLine += VCFConstants.FIELD_SEPARATOR + "FORMAT";
      for (String sample : header.getGenotypeSamples()) {
        chromLine += VCFConstants.FIELD_SEPARATOR;
        chromLine += sample;
      }
    }
    headers.add(chromLine);

    for (String line : headers) {
      out.println(
          "insert into HEADER"
              + SUFFIX
              + "(file_id,header) values ("
              + "(select max(id) from FILE"
              + SUFFIX
              + "),"
              + quote(line)
              + ");");
    }

    while (r.hasNext()) {
      VariantContext var = r.next();

      if (var == null) {
        LOG.error("Cannot parse VCF");
        continue;
      }
      // "create table if not exists FILE(id,filename text)";
      // "create table if not exists VARIATION(id,file_id,chrom,pos,start0,end0,rs_id,ref,qual)";

      out.println(
          "insert into VARIATION"
              + SUFFIX
              + "(file_id,chrom,pos,START0,END0,rs_id,ref,qual) values ("
              + "(select max(id) from FILE"
              + SUFFIX
              + "),"
              + quote(var.getChr())
              + ","
              + var.getStart()
              + ","
              + (var.getStart() - 1)
              + ","
              + var.getEnd()
              + ","
              + (var.getID() == null || var.getID().equals(VCFConstants.EMPTY_ID_FIELD)
                  ? "NULL"
                  : quote(var.getID()))
              + ","
              + quote(var.getReference().getDisplayString())
              + ","
              + (var.getPhredScaledQual() < 0 ? "NULL" : var.getPhredScaledQual())
              + ");");
      // "create table if not exists ALT(id,var_id,alt)";

      for (Allele alt : var.getAlternateAlleles()) {
        out.println(
            "insert into ALT"
                + SUFFIX
                + "(var_id,alt) values ("
                + "(select max(id) from VARIATION"
                + SUFFIX
                + "),"
                + quote(alt.getDisplayString())
                + ");");
      }
      // "create table if not exists FILTER(id,var_id,filter)";

      for (String filter : var.getFilters()) {
        out.println(
            "insert into FILTER"
                + SUFFIX
                + "(var_id,filter) values ("
                + "(select max(id) from VARIATION"
                + SUFFIX
                + "),"
                + quote(filter)
                + ");");
      }
      CommonInfo infos = var.getCommonInfo();
      for (String key : infos.getAttributes().keySet()) {
        Object val = infos.getAttribute(key);
        // "create table if not exists INFO(id,var_id,k,v)";

        if (SPLIT4 && key.equals("DP4")) {
          String dp4[] = infotoString(val).split("[,]");
          insertIntoInfo(quote(key + "[refFor]"), quote(dp4[0]));
          insertIntoInfo(quote(key + "[refRev]"), quote(dp4[1]));
          insertIntoInfo(quote(key + "[altFor]"), quote(dp4[2]));
          insertIntoInfo(quote(key + "[altRev]"), quote(dp4[3]));
        } else {
          insertIntoInfo(quote(key), quote(infotoString(val)));
        }

        if (key.equals("CSQ") && csqColumns != null) {
          List as_array = castToStringArray(val);

          for (Object csqs : as_array) {
            if (csqs.toString().isEmpty()) continue;
            String tokens[] = pipe.split(csqs.toString());
            List<String> extraInfo = new ArrayList<String>();
            for (int t = 0; t < tokens.length && t < csqColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              if (csqColumns[t].equals("Consequence")) {
                for (String pred : amp.split(tokens[t])) {
                  if (pred.isEmpty()) continue;
                  extraInfo.add(csqColumns[t]);
                  extraInfo.add(pred);
                }

              } else {
                extraInfo.add(csqColumns[t]);
                extraInfo.add(tokens[t]);
              }
            }
            insertExtraInfos("CSQ", extraInfo);
          }
        }

        if (key.equals("EFF") && snpEffColumns != null) {
          for (Object item : castToStringArray(val)) {
            String snpeff = item.toString();
            if (snpeff.isEmpty()) continue;
            int opar = snpeff.indexOf('(');
            if (opar == -1) continue;
            int cpar = snpeff.lastIndexOf(')');
            if (cpar == -1) continue;
            String tokens[] = pipe.split(snpeff.substring(opar + 1, cpar));
            List<String> h = new ArrayList<String>();
            h.add("Effect");
            h.add(snpeff.substring(0, opar));
            for (int t = 0; t < tokens.length && t < snpEffColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              h.add(snpEffColumns[t]);
              h.add(tokens[t]);
            }
            insertExtraInfos(key, h);
          }
        }

        if (key.equals("NMD") && nmdColumns != null) {

          for (Object item : castToStringArray(val)) {
            String nmd = item.toString();
            if (nmd.isEmpty()) continue;
            String tokens[] = pipe.split(nmd);
            List<String> h = new ArrayList<String>(nmdColumns.length * 2);
            for (int t = 0; t < tokens.length && t < nmdColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              h.add(nmdColumns[t]);
              h.add(tokens[t]);
            }
            insertExtraInfos(key, h);
          }
        }

        if (key.equals("LOF") && lofColumns != null) {

          for (Object item : castToStringArray(val)) {
            String lof = item.toString();
            if (lof.isEmpty()) continue;
            String tokens[] = pipe.split(lof);
            List<String> h = new ArrayList<String>(lofColumns.length * 2);
            for (int t = 0; t < tokens.length && t < lofColumns.length; ++t) {
              if (tokens[t].isEmpty()) continue;
              h.add(lofColumns[t]);
              h.add(tokens[t]);
            }
            insertExtraInfos(key, h);
          }
        }
      }
      GenotypesContext genotypesCtx = var.getGenotypes();
      for (Genotype g : genotypesCtx) {
        // "create table if not exists GENOTYPE(id,var_id,k,v)";

        List<Allele> alleles = g.getAlleles();

        out.println(
            "insert into GENOTYPE"
                + SUFFIX
                + "(var_id,sample_id,A1,A2,dp,ad,gq,pl,"
                + "is_phased,is_hom,is_homref,is_homvar,is_mixed,"
                + "is_nocall,is_noninformative,is_available,is_called,is_filtered"
                + ") values ("
                + "(select max(id) from VARIATION"
                + SUFFIX
                + "),"
                + "(select id from SAMPLE"
                + SUFFIX
                + " where name="
                + quote(g.getSampleName())
                + "),"
                + (alleles.size() == 2 ? quote(alleles.get(0).getBaseString()) : "NULL")
                + ","
                + (alleles.size() == 2 ? quote(alleles.get(1).getBaseString()) : "NULL")
                + ","
                + (g.hasDP() ? g.getDP() : "NULL")
                + ","
                + (g.hasAD() ? quote(infotoString(g.getAD())) : "NULL")
                + ","
                + (g.hasGQ() ? g.getGQ() : "NULL")
                + ","
                + (g.hasPL() ? quote(infotoString(g.getPL())) : "NULL")
                + ","
                + (g.isPhased() ? 1 : 0)
                + ","
                + (g.isHom() ? 1 : 0)
                + ","
                + (g.isHomRef() ? 1 : 0)
                + ","
                + (g.isHomVar() ? 1 : 0)
                + ","
                + (g.isMixed() ? 1 : 0)
                + ","
                + (g.isNoCall() ? 1 : 0)
                + ","
                + (g.isNonInformative() ? 1 : 0)
                + ","
                + (g.isAvailable() ? 1 : 0)
                + ","
                + (g.isCalled() ? 1 : 0)
                + ","
                + (g.isFiltered() ? 1 : 0)
                + ");");

        for (String key : g.getExtendedAttributes().keySet()) {
          Object val = g.getExtendedAttribute(key);
          if (val == null) continue;
          out.println(
              "insert into GTPROP"
                  + SUFFIX
                  + "(genotype_id,k,v) values ("
                  + "(select max(id) from GENOTYPE"
                  + SUFFIX
                  + "),"
                  + quote(key)
                  + ","
                  + quote(infotoString(val))
                  + ");");
        }
      }
    }
    r.close();
  }

  private String quote(String s) {
    if (s == null) return "NULL";
    StringBuilder b = new StringBuilder();
    b.append("\'");
    for (int i = 0; i < s.length(); ++i) {
      char c = s.charAt(i);
      switch (c) {
        case '\'':
          b.append("''");
          break;
        default:
          b.append(c);
          break;
      }
    }
    b.append("\'");
    return b.toString();
  }

  private void insertExtraInfos(String type, List<String> h) {
    boolean first = true;
    for (int i = 0; i + 1 < h.size(); i += 2) {
      if (h.get(i + 1).isEmpty()) continue;
      if (first) {

        out.println(
            "insert into EXTRAINFO"
                + SUFFIX
                + "(info_id,type) values ("
                + "(select max(id) from INFO"
                + SUFFIX
                + "),"
                + quote(type)
                + ");");
        first = false;
      }

      out.println(
          "insert into EXTRAINFOPROP"
              + SUFFIX
              + "(extrainfo_id,k,v) values ("
              + "(select max(id) from EXTRAINFO"
              + SUFFIX
              + "),"
              + quote(h.get(i))
              + ","
              + quote(h.get(i + 1))
              + ");");
    }
  }

  @SuppressWarnings("unchecked")
  private List castToStringArray(Object val) {
    if (val instanceof List) {
      return (List) val;
    } else {
      return new ArrayList(Collections.singleton(val.toString()));
    }
  }

  private String infotoString(Object o) {
    if (o instanceof int[]) {
      int array[] = (int[]) o;
      StringBuilder b = new StringBuilder();
      for (int i = 0; i < array.length; ++i) {
        if (i > 0) b.append(",");
        b.append(infotoString(array[i]));
      }
      return b.toString();
    }
    if (o instanceof List) {
      List<?> L = List.class.cast(o);
      StringBuilder b = new StringBuilder();
      for (int i = 0; i < L.size(); ++i) {
        if (i > 0) b.append(",");
        b.append(infotoString(L.get(i)));
      }
      return b.toString();
    }
    return o.toString();
  }

  private void insertIntoInfo(String key, String val) {
    out.println(
        "insert into INFO"
            + SUFFIX
            + "(var_id,k,v) values ("
            + "(select max(id) from VARIATION"
            + SUFFIX
            + "),"
            + key
            + ","
            + val
            + ");");
  }

  public static void main(String[] args) {
    new VcfToSql().instanceMainWithExit(args);
  }
}

Example #13

Show file

File: VcfToSql.java Project: hangelwen/jvarkit

  @Override
  protected int doWork() {
    try {
      try {
        this.engine = SQLEngine.valueOf(this.ENGINE);
      } catch (Exception err) {
        LOG.error("BAD SQL ENGINE " + this.ENGINE);
        return -1;
      }
      out.println(
          "create table if not exists FILE"
              + SUFFIX
              + "("
              + columnId()
              + "filename "
              + varchar(255)
              + " NOT NULL"
              + ");");

      out.println(
          "create table if not exists HEADER"
              + SUFFIX
              + "("
              + columnId()
              + "file_id INT NOT NULL REFERENCES FILE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "header "
              + text()
              + ");");

      out.println(
          "create table if not exists SAMPLE"
              + SUFFIX
              + "("
              + columnId()
              + "name "
              + varchar(100)
              + " NOT NULL UNIQUE"
              + ");");
      out.println(
          "create table if not exists VARIATION"
              + SUFFIX
              + "("
              + columnId()
              + "file_id INT NOT NULL REFERENCES FILE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "CHROM VARCHAR(20) NOT NULL,"
              + "POS INT NOT NULL,"
              + "START0 INT NOT NULL,"
              + "END0 INT NOT NULL,"
              + "RS_ID VARCHAR(50),"
              + "REF "
              + text()
              + " NOT NULL,"
              + "QUAL FLOAT"
              + ");");

      out.println(
          "create table if not exists ALT"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "ALT "
              + text()
              + ");");
      out.println(
          "create table if not exists FILTER"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "FILTER varchar(50) not null"
              + ");");

      out.println(
          "create table if not exists INFO"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "k varchar(50) not null,"
              + "v "
              + text()
              + " not null"
              + ");");

      out.println(
          "create table if not exists EXTRAINFO"
              + SUFFIX
              + "("
              + columnId()
              + "info_id INT NOT NULL REFERENCES INFO"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "type varchar(50) not null"
              + ");");

      out.println(
          "create table if not exists EXTRAINFOPROP"
              + SUFFIX
              + "("
              + columnId()
              + "extrainfo_id INT NOT NULL REFERENCES EXTRAINFO"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "k varchar(50) not null,"
              + "v "
              + text()
              + " not null"
              + ");");

      out.println(
          "create table if not exists GENOTYPE"
              + SUFFIX
              + "("
              + columnId()
              + "var_id INT NOT NULL REFERENCES VARIATION"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "sample_id INT NOT NULL REFERENCES SAMPLE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "A1 "
              + text()
              + ", A2 "
              + text()
              + ", dp int, ad varchar(50), gq float,pl "
              + text()
              + ","
              + "is_phased SMALLINT not null,is_hom SMALLINT not null,is_homref  SMALLINT not null,is_homvar  SMALLINT not null,is_mixed  SMALLINT not null,"
              + "is_nocall SMALLINT not null,is_noninformative SMALLINT not null,is_available SMALLINT not null,is_called SMALLINT not null,is_filtered  SMALLINT not null"
              + ");");
      out.println(
          "create table if not exists GTPROP"
              + SUFFIX
              + "("
              + columnId()
              + "genotype_id INT NOT NULL REFERENCES GENOTYPE"
              + SUFFIX
              + "(id) ON DELETE CASCADE,"
              + "k varchar(50) not null,"
              + "v "
              + text()
              + " not null"
              + ");");
      switch (this.engine) {
        case sqlite:
          out.println("begin transaction;");
          break;
        default:
          break;
      }

      if (IN.isEmpty()) {
        LOG.info("reading from stdin");
        read(System.in, "<stdin>");
      } else {
        for (File input : IN) {
          LOG.info("opening " + input);
          InputStream in = IOUtils.openFileForReading(input);
          read(in, input.toString());
          in.close();
        }
      }
      if (SQLINDEX) {
        index("SAMPLE", "name");
        index("EXTRAINFO", "type");
        index("EXTRAINFOPROP", "k");
        index("EXTRAINFOPROP", "v");

        index("INFO", "var_id");
        index("INFO", "k");
        index("EXTRAINFO", "info_id");
        index("EXTRAINFOPROP", "extrainfo_id");
        index("GENOTYPE", "var_id");
        index("GENOTYPE", "sample_id");
      }
      switch (this.engine) {
        case sqlite:
          out.println("commit;");
          break;
        default:
          break;
      }

      out.flush();
    } catch (IOException err) {
      err.printStackTrace();
      return -1;
    }
    return 0;
  }