Beispiel #1
0
  @Override
  protected void doWork(VcfIterator r, VariantContextWriter w) throws IOException {
    AbstractVCFCodec codeIn3 = VCFUtils.createDefaultVCFCodec();
    String line;

    StringWriter sw = new StringWriter();
    LOG.info("opening tabix file: " + this.TABIX);
    TabixReader tabix = new TabixReader(this.TABIX);

    while ((line = tabix.readLine()) != null) {
      if (!line.startsWith(VCFHeader.HEADER_INDICATOR)) {
        break;
      }
      sw.append(line).append("\n");
    }
    VCFHeader header3 =
        (VCFHeader)
            codeIn3.readActualHeader(
                new LineIteratorImpl(
                    LineReaderUtil.fromBufferedStream(
                        new ByteArrayInputStream(sw.toString().getBytes()))));
    VCFHeader header1 = r.getHeader();

    VCFHeader h2 =
        new VCFHeader(header1.getMetaDataInInputOrder(), header1.getSampleNamesInOrder());
    for (String infoId : this.INFO_IDS) {
      VCFInfoHeaderLine vihl = header3.getInfoHeaderLine(infoId);
      if (vihl == null) {
        LOG.warn("Not INFO=" + infoId + " in " + TABIX);
        continue;
      }
      if (h2.getInfoHeaderLine(infoId) != null) {
        LOG.warn("Input already contains INFO=" + vihl);
      }
      h2.addMetaDataLine(vihl);
    }

    if (ALT_CONFLICT_FLAG != null) {
      h2.addMetaDataLine(
          new VCFInfoHeaderLine(
              ALT_CONFLICT_FLAG,
              1,
              VCFHeaderLineType.Flag,
              "conflict ALT allele with " + this.TABIX));
    }

    w.writeHeader(h2);
    while (r.hasNext()) {
      VariantContext ctx1 = r.next();

      VariantContextBuilder vcb = new VariantContextBuilder(ctx1);
      String line2;
      String BEST_ID = null;
      boolean best_id_match_alt = false;

      List<VariantContext> variantsList = new ArrayList<VariantContext>();

      int[] array = tabix.parseReg(ctx1.getChr() + ":" + (ctx1.getStart()) + "-" + (ctx1.getEnd()));
      TabixReader.Iterator iter = null;

      if (array != null && array.length == 3 && array[0] != -1 && array[1] >= 0 && array[2] >= 0) {
        iter = tabix.query(array[0], array[1], array[2]);
      } else {
        LOG.info("Cannot get " + ctx1.getChr() + ":" + (ctx1.getStart()) + "-" + (ctx1.getEnd()));
      }

      while (iter != null && (line2 = iter.next()) != null) {
        VariantContext ctx3 = codeIn3.decode(line2);
        if (ctx3.getStart() != ctx1.getStart()) continue;
        if (ctx3.getEnd() != ctx1.getEnd()) continue;

        if (ctx1.getReference().equals(ctx3.getReference())
            && ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
          variantsList.clear();
          variantsList.add(ctx3);
          break;
        } else {
          variantsList.add(ctx3);
        }
      }

      for (VariantContext ctx3 : variantsList) {

        if (this.REF_ALLELE_MATTERS && !ctx1.getReference().equals(ctx3.getReference())) {
          continue;
        }
        if (this.ALT_ALLELES_MATTERS
            && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
          continue;
        }

        if (ctx3.getID() != null && this.REPLACE_ID) {
          if (BEST_ID != null && best_id_match_alt) {
            // nothing
          } else {
            BEST_ID = ctx3.getID();
            best_id_match_alt = ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles());
          }
        }

        for (String id : this.INFO_IDS) {
          Object info3 = ctx3.getAttribute(id);
          if (info3 == null) {
            continue;
          }
          Object info1 = ctx1.getAttribute(id);
          if (info1 != null && !this.REPLACE_INFO_FIELD) {
            continue;
          }

          vcb.attribute(id, info3);
        }

        if (ALT_CONFLICT_FLAG != null
            && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
          vcb.attribute(ALT_CONFLICT_FLAG, true);
        }
      }
      if (BEST_ID != null) {
        vcb.id(BEST_ID);
      }
      w.add(vcb.make());
    }
    tabix.close();
  }