Пример #1
0
  /** @param args the command line arguments */
  @SuppressWarnings("ManualArrayToCollectionCopy")
  public static void main(String[] args) throws Exception {

    // eQTLTextFile eQTLsTextFile = new
    // QTLTextFile("D:\\UMCG\\Genetica\\Projects\\RnaSeqEqtl\\batch9_eQTLmapping\\result_non-geuvadis_maf0.05_call0.5_pcs100_normalizedPCA_meta\\notInGeuvadis.txt", false);
    // eQTLTextFile eQTLsTextFile = new
    // QTLTextFile("D:\\UMCG\\Genetica\\Projects\\RnaSeqEqtl\\batch9_eQTLmapping\\result_all_maf0.05_call0.5_pcs100_normalizedPCA_meta_specialPermutation_fix\\eQTLsFDR0.05-ProbeLevel.txt", false);
    QTLTextFile eQTLsTextFile =
        new QTLTextFile(
            "D:\\UMCG\\Genetica\\Projects\\RnaSeqEqtl\\batch9_eQTLmapping\\result_geuvadis_maf0.05_call0.5_pcs100_normalizedPCA_meta_fix\\eQTLsFDR0.05-ProbeLevel.txt",
            false);

    BufferedReader aseReader =
        new BufferedReader(
            new FileReader(
                "D:\\UMCG\\Genetica\\Projects\\RnaSeqEqtl\\Ase\\geuvadis_maskAll4_r20_a10_p2_s5_rq17_m1_gatkGenoGq30\\ase_bh.txt"));

    HashMap<String, ArrayList<EQTL>> eQtls = new HashMap<String, ArrayList<EQTL>>();

    for (Iterator<EQTL> eQtlIt = eQTLsTextFile.getEQtlIterator(); eQtlIt.hasNext(); ) {
      EQTL eQtl = eQtlIt.next();
      String eQtlKey = eQtl.getRsChr() + ":" + eQtl.getRsChrPos();
      ArrayList<EQTL> posEqtls = eQtls.get(eQtlKey);
      if (posEqtls == null) {
        posEqtls = new ArrayList<EQTL>(1);
        eQtls.put(eQtlKey, posEqtls);
      }
      posEqtls.add(eQtl);
    }

    int aseTotal = 0;
    int aseWithEQtl = 0;
    int sameDirection = 0;
    int oppositeDirection = 0;

    HashSet<String> countedGenes = new HashSet<String>();

    aseReader.readLine(); // header
    String line;
    String[] elements;
    while ((line = aseReader.readLine()) != null) {

      elements = TAB_PATTERN.split(line);

      HashSet<String> aseGenes = new HashSet<String>();
      for (String gene : COMMA_PATTERN.split(elements[ASE_GENES_COLUMN])) {
        aseGenes.add(gene);
      }

      ++aseTotal;

      ArrayList<EQTL> posEqtls =
          eQtls.get(elements[ASE_CHR_COLUMN] + ":" + elements[ASE_POS_COLUMN]);
      if (posEqtls != null) {
        for (EQTL eQtl : posEqtls) {
          if (eQtl != null && aseGenes.contains(eQtl.getProbe())) {

            if (countedGenes.contains(eQtl.getProbe())) {
              continue;
            }
            countedGenes.add(eQtl.getProbe());

            // System.out.println(eQtl.getProbe());

            // if(eQtl.getRsChr() == 6 && eQtl.getRsChrPos() > 20000000 && eQtl.getRsChrPos() <
            // 40000000) { continue; }

            ++aseWithEQtl;

            double aseEstimate = Double.parseDouble(elements[ASE_ESTIMATE_COLUMN]);
            double eQtlZ =
                elements[ASE_A1_COLUMN].equals(eQtl.getAlleleAssessed())
                    ? eQtl.getZscore()
                    : eQtl.getZscore() * -1;

            if (aseEstimate > 0.5 && eQtlZ > 0 || aseEstimate < 0.5 && eQtlZ < 0) {
              // System.out.println("Same direction: " + eQtl.getRsChr() + ":" + eQtl.getRsChrPos()
              // + "\t" + elements[ASE_A1_COLUMN] + "\t" + eQtl.getAlleleAssessed() + "\t" +
              // aseEstimate + "\t" + eQtl.getZscore());
              ++sameDirection;
            } else {
              // System.out.println("Opposite: " + eQtl.getRsChr() + ":" + eQtl.getRsChrPos() + "\t"
              // + elements[ASE_A1_COLUMN] + "\t" + eQtl.getAlleleAssessed() + "\t" + aseEstimate +
              // "\t" + eQtl.getZscore());
              ++oppositeDirection;
            }
          }
        }
      }
    }

    NumberFormat numberFormat = NumberFormat.getInstance();
    numberFormat.setMinimumFractionDigits(2);
    numberFormat.setMaximumFractionDigits(2);
    System.out.println("Ase total: " + aseTotal);
    System.out.println(
        "Ase SNP with eQTL effect: "
            + aseWithEQtl
            + " ("
            + numberFormat.format(aseWithEQtl / (double) aseTotal)
            + ")");
    System.out.println(
        " - Same direction: "
            + sameDirection
            + " ("
            + numberFormat.format(sameDirection / (double) aseWithEQtl)
            + ")");
    System.out.println(
        " - Opposite direction: "
            + oppositeDirection
            + " ("
            + numberFormat.format(oppositeDirection / (double) aseWithEQtl)
            + ")");
  }
  /*
   * Find LD SNPs
   */
  public HashMap<String, TreeMap<Integer, ArrayList<Ld>>> calculateLd(
      HashMap<String, EQTL> eqtlData,
      RandomAccessGenotypeData genotypeData,
      int windowSize,
      double r2CutOff)
      throws IOException {
    // Use a window size of 250k: eQTL pos - 250k and eQTL pos + 250k
    Ld ld = null;
    HashMap<String, TreeMap<Integer, ArrayList<Ld>>> ldResults =
        new HashMap<String, TreeMap<Integer, ArrayList<Ld>>>();

    Iterator<Map.Entry<String, EQTL>> eqtlIterator = eqtlData.entrySet().iterator();
    while (eqtlIterator.hasNext()) {
      Map.Entry pairs = (Map.Entry) eqtlIterator.next();
      EQTL eqtl = (EQTL) pairs.getValue();

      GeneticVariant eQtlSnp =
          genotypeData.getSnpVariantByPos(eqtl.getRsChr().toString(), eqtl.getRsChrPos());

      if (eQtlSnp != null) {

        for (GeneticVariant gv :
            genotypeData.getVariantsByRange(eqtl.getRsChr().toString(), 0, windowSize)) {

          if (eQtlSnp.isBiallelic() && gv.isBiallelic()) {
            try {
              ld = eQtlSnp.calculateLd(gv);
            } catch (LdCalculatorException ex) {
              System.out.println("Error in LD calculation: " + ex.getMessage());
              System.exit(1);
            }
            GeneticVariant variant1 = ld.getVariant1();
            GeneticVariant variant2 = ld.getVariant2();

            if (ld.getR2() >= r2CutOff) {

              // Place results in a convenient structure for later.
              TreeMap<Integer, ArrayList<Ld>> tmp;
              ArrayList<Ld> ldList;
              if (ldResults.containsKey(variant2.getSequenceName())) {

                tmp = ldResults.get(variant2.getSequenceName());

                if (tmp.containsKey(variant2.getStartPos())) {
                  ldList = tmp.get(variant2.getStartPos());
                  ldList.add(ld);
                } else {
                  ldList = new ArrayList<Ld>();
                  ldList.add(ld);
                  tmp.put(variant2.getStartPos(), ldList);
                }
              } else {
                tmp = new TreeMap<Integer, ArrayList<Ld>>();
                ldList = new ArrayList<Ld>();
                ldList.add(ld);
                tmp.put(variant2.getStartPos(), ldList);
                ldResults.put(variant2.getSequenceName(), tmp);
              }
            }
          }
        }
      }
    }
    return ldResults;
  }