Esempio n. 1
  void checkVarTypeField(String file, String varTypeExpected) {
    SnpSiftCmdVarType varType = new SnpSiftCmdVarType(null);

    VcfFileIterator vcf = new VcfFileIterator(file);
    for (VcfEntry ve : vcf) {
      // Annotate

      // Check that all variants are the ones expected
      String varTypeAnnotated = ve.getInfo(SnpSiftCmdVarType.VARTYPE);
      Assert.assertEquals(varTypeExpected, varTypeAnnotated);
Esempio n. 2
  /** Add ID information. Make sure we are no repeating IDs */
  protected boolean annotateIds(VcfEntry vcfEntry, Set<String> idSet) {
    if (idSet.isEmpty()) return false;

    // Add IDs, make sure we are no repeating them
    // Get unique IDs (the ones not already present in
    boolean annotated = false;
    String id = uniqueIds(idSet, vcfEntry.getId());
    if (!id.isEmpty()) { // Skip if no new ids found
      annotated = true;

      // Add ID
      if (!vcfEntry.getId().isEmpty()) id = vcfEntry.getId() + ";" + id;

    return annotated;
Esempio n. 3
  /** Find matching entries in the database */
  public List<VcfEntry> find(VcfEntry vcfEntry) {
    List<Variant> vars = vcfEntry.variants();
    List<VcfEntry> ves = new LinkedList<>();

    for (Variant var : vars) ves.addAll(find(var));

    return ves;
Esempio n. 4
  /** Check info field Note: We report the first error we find */
  String checkInfo(String infoName) {
    if (infoName.isEmpty()) return "";

    VcfHeaderInfo vcfInfo = getVcfInfo(infoName);
    if (vcfInfo == null) return "Cannot find header for INFO field '" + infoName + "'";

    // Split INFO value and match it to allele
    String valsStr = getInfo(infoName);
    if (valsStr == null) return ""; // INFO field not present, nothing to do

    // Check values
    String values[] = valsStr.split(",");
    for (String val : values)
      if (!VcfEntry.isValidInfoValue(val))
        return "INFO filed '"
            + infoName
            + "' has an invalid value '"
            + val
            + "' (no spaces, tabs, '=' or ';' are allowed)";

    // Check number of INFO elements
    if (vcfInfo.isNumberNumber() && vcfInfo.getNumber() != values.length) {
      VcfInfoType type = vcfInfo.getVcfInfoType();
      if (type == VcfInfoType.Flag && values.length == 1)
        ; // OK, flags must have one or zero values
        return "INFO filed '"
            + infoName
            + "' has 'Number="
            + vcfInfo.getNumber()
            + "' in header, but it contains '"
            + values.length
            + "' elements.";
    if (vcfInfo.isNumberAllAlleles() && values.length != (alts.length + 1))
      return "INFO filed '"
          + infoName
          + "' has 'Number=R' in header, but it contains '"
          + values.length
          + "' elements when there are '"
          + alts.length
          + "' alleles (it should have '"
          + (alts.length + 1)
          + "' elements).";
    if (vcfInfo.isNumberAllAlleles() && values.length != alts.length)
      return "INFO filed '"
          + infoName
          + "' has 'Number=A' in header, but it contains '"
          + values.length
          + "' elements when there are '"
          + alts.length
          + "' alleles.";

    return "";
Esempio n. 5
  /** Annotate a VCF entry */
  public boolean annotate(VcfEntry vcfEntry) throws IOException {
    boolean annotated = false;
    Set<String> idSet = new HashSet<>();
    Map<String, String> infos = new HashMap<>();
    boolean exists = false;

    // ---
    // Find all matching database entries
    // Note that QueryResult.variantVcfEntry can be 'null'
    // ---
    List<QueryResult> queryResults = new LinkedList<>();
    Set<VcfEntry> uniqueVcfEntries = new HashSet<>();
    for (Variant var : vcfEntry.variants()) {
      // Skip huge structural variants
      if (var.isStructuralHuge()) continue;

      // Query database
      Collection<VariantVcfEntry> results = query(var);

      // Make sure we add all found VcfEntries
      for (VariantVcfEntry dbEntry : results) uniqueVcfEntries.add(dbEntry.getVcfEntry());

      // Add query and result
      QueryResult qr = new QueryResult(var, results);
      if (debug) Gpr.debug("Adding QueryResult: " + qr);

    // Try to find INFO fields that we might have not seen before
    if (useAllInfoFields) {
      for (VcfEntry ve : uniqueVcfEntries) discoverInfoFields(ve);

    // Add INFO fields using 'REF' data
    findDbInfoRef(infos, uniqueVcfEntries);

    // ---
    // Annotate all fields
    // ---
    for (QueryResult qr : queryResults) {
      if (debug) Gpr.debug("Processing QueryResult: " + qr);

      if (useId) findDbId(idSet, qr);
      if (existsInfoField != null) exists |= findDbExists(qr);
      if (useInfoFields) findDbInfo(infos, qr);

    // Annotate input vcfEntry
    annotated |= annotateIds(vcfEntry, idSet);
    annotated |= annotateInfo(vcfEntry, infos);
    if (exists) annotateExists(vcfEntry);

    return annotated;
Esempio n. 6
   * Check that all variants in a file belong to a given type
   * @param file
   * @param varTypeExpected
  void checkAllVarType(String file, String varTypeExpected) {
    SnpSiftCmdVarType varType = new SnpSiftCmdVarType(null);

    VcfFileIterator vcf = new VcfFileIterator(file);
    for (VcfEntry ve : vcf) {
      // Annotate

      // Check that all variants are the ones expected
      if (verbose)
            ve //
                + "\n\tvarTypeExpected: "
                + varTypeExpected //
                + "\n\tINFO flag      : "
                + ve.getInfoFlag(varTypeExpected) //
      if (!ve.getInfoFlag(varTypeExpected))
        System.err.println("Eror in file '" + file + "':\n" + ve);
      Assert.assertEquals(true, ve.getInfoFlag(varTypeExpected));
Esempio n. 7
  /** Add INFO fields. */
  protected boolean annotateInfo(VcfEntry vcfEntry, Map<String, String> info) {
    if (info == null || info.isEmpty()) return false;

    // Sort keys alphabetically
    ArrayList<String> keys = new ArrayList<String>();

    // Add keys sorted alphabetically
    for (String key : keys) {
      String value = info.get(key);

      // Skip empty fields?
      if (!annotateEmpty && VcfEntry.isEmpty(value)) continue;

      // Add INFO entry
      if (prependInfoFieldName != null) key = prependInfoName(key);
      vcfEntry.addInfo(key, value);

    return true;
Esempio n. 8
  /** Fill values for INFO fields requiring 'REF' value */
  protected void findDbInfoRef(Map<String, String> info, Set<VcfEntry> uniqueVcfEntries) {
    if (!useInfoFields || !hasVcfInfoPerAlleleRef) return; // Nothing to do

    for (String infoFieldName : infoFields) {
      // Does this field require 'REF' annotation?
      if (!isVcfInfoPerAlleleRef(infoFieldName)) continue;

      // Try to find 'REF' information in any entry
      String val = null;
      for (VcfEntry dbVcfEntry : uniqueVcfEntries) {
        val = dbVcfEntry.getInfo(infoFieldName, dbVcfEntry.getRef());

        if (VcfEntry.isEmpty(val)) val = null; // Only add non-empty
        else break; // We need only one value

      // Nothing found? Use 'MISSING' value
      if (val == null) val = VcfFileIterator.MISSING;

      // Store value
      info.put(infoFieldName, val);
Esempio n. 9
  /** Query VCF entries intersecting 'marker' at node 'idx' */
  protected void queryIntersects(Interval queryMarker, int idx, Markers results) {
    if (intersectFilePosStart[idx] == null) return;
    if (debug) Gpr.debug("queryIntersects\tidx: " + idx);

    // Read entries from disk
    List<VcfEntry> vcfEntries = readEntries(idx);

    // Find matching entries
    for (VcfEntry ve : vcfEntries) {
      // If any variant within the vcfEntry intersects the query
      // marker, we store this VCF entry as a result
      for (Variant var : ve.variants()) {
        if (var.intersects(queryMarker)) {
          if (debug) Gpr.debug("\tAdding matching result: " + ve);
          break; // Store this entry only once

      // Past query's end coordinate? We don't need to look any further
      if (queryMarker.getEnd() < ve.getStart()) return;
Esempio n. 10
  /** Find all non-empty INFO fields 'infoFieldName' in results */
  protected String findDbInfo(String infoFieldName, QueryResult qr) {
    if (debug) Gpr.debug("Finding DB data for INFO field: " + infoFieldName);
    StringBuilder sb = new StringBuilder();

    for (VariantVcfEntry varVe : qr.results) {
      if (varVe != null) {
        String val = varVe.getVcfEntry().getInfo(infoFieldName);
        if (!VcfEntry.isEmpty(val)) {
          if (debug) Gpr.debug("\tFound: " + val);
          if (sb.length() > 0) sb.append(',');

    return sb.length() <= 0 ? null : sb.toString();
Esempio n. 11
  /** Find the first non-empty INFO field 'infoFieldName' in results Note: ALT must match */
  protected String findDbInfoAlt(String infoFieldName, QueryResult qr) {
    for (VariantVcfEntry varVe : qr.results) {
      if (varVe != null) {
        // IMPORTANT: When a variant is parse, the original 'ALT' entry is stored in
        //            the 'Variant.genotype' whereas 'variant.alt' contains
        //            a 'minimal ALT'. E.g. if we have
        //                vcfEntry.ref = 'AC'
        //                vcfEntry.alt = 'A'
        //            Then
        //                variant.ref = 'C'
        //                variant.alt = ''
        //                variant.genotype = 'A'   <-- This is the 'original' ALT field from
        // vcfEntry
        //            That's why we use 'var.getGenotype()' in the following 'getInfo()' method.
        String vcfAlt = qr.variant.getGenotype();

        String val = varVe.getVcfEntry().getInfo(infoFieldName, vcfAlt);
        if (!VcfEntry.isEmpty(val)) return val;

    return VcfFileIterator.MISSING;
Esempio n. 12
  * If 'ALL' info fields are being used, we can try to discover new fields that have not already
  * been added to the annotation list (e.g. implicit fields not mentioned in the VCF header)
 protected void discoverInfoFields(VcfEntry dbVcfEntry) {
   // Make sure all fields are added
   for (String info : dbVcfEntry.getInfoKeys()) if (!info.isEmpty()) infoFields.add(info);
Esempio n. 13
 /** Add 'exists' flag to INFO fields */
 protected void annotateExists(VcfEntry vcfEntry) {
   vcfEntry.addInfo(existsInfoField, null);