@Override
  public void run() throws IOException {
    for (int d = 0; d < m_gg.length; d++) {
      // now test all triples
      SNPLoader snpLoader = m_gg[d].getGenotypeData().createSNPLoader();
      int[] indWGA = m_gg[d].getExpressionToGenotypeIdArray();

      for (int perm = 0; perm < m_settings.nrPermutationsFDR + 1; perm++) {
        String outfile = null;
        if (perm == 0) {
          outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-RealData.txt";
        } else {
          outfile =
              outDir + m_gg[d].getSettings().name + "_IVAnalysis-PermutationRound-" + perm + ".txt";
          m_gg[d].permuteSampleLables();
        }
        TextFile out = new TextFile(outfile, TextFile.W);
        Iterator<Triple<String, String, String>> it = snpProbeCombos.iterator();
        Triple<String, String, String> next = it.next();
        ProgressBar pb =
            new ProgressBar(
                snpProbeCombos.size(), "Running Mediation Analysis - Permutation " + perm);

        out.writeln(
            "SNP\tSNP Chr\tSNP ChrPos\t"
                + "Alleles\tDirectionAllele\t"
                + "N\t"
                + "CisArrayAddress\tCisProbe Chr\tCisProbe ChrPos\t"
                + "CisGeneName\t"
                + "TransArrayAddress\tTransProbe Chr\tTransProbe ChrPos\t"
                + "TransGeneName\t"
                + "CisTrans-Correlation\t"
                + "Cis-eQTL-Beta\t"
                + "Cis-eQTL-SE\t"
                + "CisTrans-Beta\t"
                + "CisTrans-SE\t"
                + "Trans-eQTL-Beta\t"
                + "Trans-eQTL-SE\t"
                + "CisTrans-Residual-Correlation\t"
                + "CisTrans-Residual-Beta\t"
                + "CisTrans-Residual-SE\t"
                + "Trans-eQTL-Residual-Beta\t"
                + "Trans-eQTL-Residual-SE\t"
                + "Beta-Ratio");

        while (next != null) {
          String snp = next.getLeft();
          String cisprobe = next.getMiddle();
          String transprobe = next.getRight();

          Integer snpId = m_gg[d].getGenotypeData().getSnpToSNPId().get(snp);
          Integer cisProbeId = m_gg[d].getExpressionData().getProbeToId().get(cisprobe);
          Integer transProbeId = m_gg[d].getExpressionData().getProbeToId().get(transprobe);

          if (snpId == -9 || cisProbeId == null || transProbeId == null) {
            //                        out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" +
            // cisProbeId + "\t" + null + "\t" + transprobe + "\t" + transProbeId + "\t" + null +
            // "\t" + null + "\t" + null + "\t" + null + "\t" + null +
            // "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA");
          } else {

            SNP snpObj = m_gg[d].getGenotypeData().getSNPObject(snpId);
            snpLoader.loadGenotypes(snpObj);
            if (snpLoader.hasDosageInformation()) {
              snpLoader.loadDosage(snpObj);
            }
            double[] origCisVals = m_gg[d].getExpressionData().getMatrix()[cisProbeId];
            double[] origTransVals = m_gg[d].getExpressionData().getMatrix()[transProbeId];

            int calledGenotypes = 0;
            for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
              int genotypeId = indWGA[i];
              short gt = snpObj.getGenotypes()[genotypeId];
              if (genotypeId > -1 && gt > -1) {
                calledGenotypes++;
              }
            }

            double[] genotypes = new double[calledGenotypes];
            double[] cisvals = new double[calledGenotypes];
            double[] transvals = new double[calledGenotypes];

            calledGenotypes = 0;
            for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
              int genotypeId = indWGA[i];
              short gt = snpObj.getGenotypes()[genotypeId];
              if (genotypeId > -1 && gt > -1) {
                genotypes[calledGenotypes] = snpObj.getDosageValues()[genotypeId];
                cisvals[calledGenotypes] = origCisVals[i];
                transvals[calledGenotypes] = origTransVals[i];
                calledGenotypes++;
              }
            }

            // normalize genotype and cis + trans to get beta's equal to the correlation coefficient
            genotypes = normalize(genotypes);
            cisvals = normalize(cisvals);
            transvals = normalize(transvals);

            double corrCisTrans =
                JSci.maths.ArrayMath.correlation(cisvals, transvals); // for code validation
            double[] cisTransRCs =
                Regression.getLinearRegressionCoefficients(
                    cisvals, transvals); // returns beta, alpha, se, t
            double[] snpCisRCs =
                Regression.getLinearRegressionCoefficients(
                    genotypes, cisvals); // returns beta, alpha, se, t
            double[] snpTransRCs = Regression.getLinearRegressionCoefficients(genotypes, transvals);

            // remove correlation between cis and trans probe
            //                        double[] resCis = new double[cisvals.length];
            double[] resTransVals = new double[cisvals.length];
            for (int i = 0; i < resTransVals.length; i++) {
              //                            resCis[i] = cisvals[i] - snpCisRCs[0] * genotypes[i];
              resTransVals[i] = transvals[i] - cisTransRCs[0] * cisvals[i];
            }

            resTransVals = normalize(resTransVals);

            double[] cisResTransRCs =
                Regression.getLinearRegressionCoefficients(
                    cisvals, resTransVals); // returns beta, alpha, se, t
            double[] snpResTransRCs =
                Regression.getLinearRegressionCoefficients(genotypes, resTransVals);

            double rescorr =
                JSci.maths.ArrayMath.correlation(cisvals, resTransVals); // for code validation

            out.writeln(
                snp
                    + "\t"
                    + snpObj.getChr()
                    + "\t"
                    + snpObj.getChrPos()
                    + "\t"
                    + BaseAnnot.toString(snpObj.getAlleles()[0])
                    + "/"
                    + BaseAnnot.toString(snpObj.getAlleles()[1])
                    + "\t"
                    + BaseAnnot.toString(snpObj.getAlleles()[0])
                    + "\t"
                    + transvals.length
                    + "\t"
                    + cisprobe
                    + "\t"
                    + m_gg[d].getExpressionData().getChr()[cisProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getChrStart()[cisProbeId]
                    + ":"
                    + m_gg[d].getExpressionData().getChrStop()[cisProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getAnnotation()[cisProbeId]
                    + "\t"
                    + transprobe
                    + "\t"
                    + m_gg[d].getExpressionData().getChr()[transProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getChrStart()[transProbeId]
                    + ":"
                    + m_gg[d].getExpressionData().getChrStop()[transProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getAnnotation()[transProbeId]
                    + "\t"
                    + corrCisTrans
                    + "\t"
                    + snpCisRCs[0]
                    + "\t"
                    + snpCisRCs[2]
                    + "\t"
                    + cisTransRCs[0]
                    + "\t"
                    + cisTransRCs[2]
                    + "\t"
                    + snpTransRCs[0]
                    + "\t"
                    + snpTransRCs[2]
                    + "\t"
                    + rescorr
                    + "\t"
                    + cisResTransRCs[0]
                    + "\t"
                    + cisResTransRCs[2]
                    + "\t"
                    + snpResTransRCs[0]
                    + "\t"
                    + snpResTransRCs[2]
                    + "\t"
                    + (snpResTransRCs[0] / snpTransRCs[0]));
            snpObj.clearGenotypes();
          }

          if (it.hasNext()) {
            next = it.next();
          } else {
            next = null;
          }
          pb.iterate();
        }
        pb.close();
        out.close();
      }
      snpLoader.close();
    }
  }
Пример #2
0
  private void analyze(WorkPackage wp) {
    testsPerformed = 0;
    currentWP = wp;
    wp.setNumTested(0);
    //        RunTimer t1 = new RunTimer();
    // load SNP genotypes
    SNP[] snps = wp.getSnps();
    int[] probes = wp.getProbes();
    Result dsResults = null;

    double[] snpvariances = new double[m_numDatasets];
    double[][] snpmeancorrectedgenotypes = new double[m_numDatasets][0];
    double[][] originalgenotypes = new double[m_numDatasets][0];
    boolean[][] includeExpressionSample = new boolean[m_numDatasets][0];

    for (int d = 0; d < m_numDatasets; d++) {
      SNP dSNP = snps[d];

      if (dSNP != null) {

        double[] x = dSNP.selectGenotypes(m_expressionToGenotypeIds[d], false, true);
        originalgenotypes[d] = dSNP.selectGenotypes(m_expressionToGenotypeIds[d], false, false);

        int xLen = x.length;
        double meanX = JSci.maths.ArrayMath.mean(x);

        snpmeancorrectedgenotypes[d] = new double[xLen];

        for (int i = 0; i < xLen; i++) {
          snpmeancorrectedgenotypes[d][i] = x[i] - meanX;
        }

        double varianceX = JSci.maths.ArrayMath.variance(x);
        if (varianceX != 0) {
          snpvariances[d] = varianceX;

          int inds[] = m_expressionToGenotypeIds[d];
          int sampleCount = m_expressionToGenotypeIds[d].length;
          includeExpressionSample[d] = new boolean[sampleCount];
          byte[] genotypes = dSNP.getGenotypes();
          for (int s = 0; s < sampleCount; s++) {
            int ind = inds[s];
            double valX = genotypes[ind]; // loadedSNPGenotype[ind];
            if (valX != -1) {
              includeExpressionSample[d][s] = true;
            } else {
              includeExpressionSample[d][s] = false;
            }
          }
        } else {
          dSNP.clearGenotypes();
          dSNP = null;
          wp.getFlipSNPAlleles()[d] = null;
          snps[d] = null;
        }
      }
    }

    if (cisOnly) {
      dsResults = new Result(m_numDatasets, wp.getProbes().length, wp.getId());
      for (int d = 0; d < m_numDatasets; d++) {
        SNP dSNP = snps[d];

        if (dSNP != null) {
          dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length;
          double[][] rawData = m_expressiondata[d].getMatrix();
          double[] varY = m_expressiondata[d].getProbeVariance();
          double[] meanY = m_expressiondata[d].getProbeMean();
          int samplecount = m_expressiondata[d].getIndividuals().length;

          double[][] covariates = null;
          if (m_covariates != null) {
            DoubleMatrixDataset<String, String> covariateData = m_covariates[d];
            covariates = covariateData.rawData;
          }

          for (int p = 0; p < probes.length; p++) {
            int pid = probes[p];
            Integer probeId = m_probeTranslation.get(d, pid);
            if (probeId != -9) {
              test(
                  d,
                  p,
                  probeId,
                  snpmeancorrectedgenotypes[d],
                  originalgenotypes[d],
                  snpvariances[d],
                  varY[probeId],
                  meanY[probeId],
                  includeExpressionSample[d],
                  samplecount,
                  rawData,
                  covariates,
                  dsResults,
                  this.currentWP,
                  this.metaAnalyseModelCorrelationYHat,
                  this.metaAnalyseInteractionTerms,
                  this.determinefoldchange);
            } else {
              dsResults.correlations[d][p] = Double.NaN;
              dsResults.zscores[d][p] = Double.NaN;
            }
          }

        } else {
          for (int p = 0; p < probes.length; p++) {
            dsResults.correlations[d][p] = Double.NaN;
            dsResults.zscores[d][p] = Double.NaN;
          }
        }
      }
    } else if (transOnly) {

      HashSet<Integer> probestoExclude = null;
      if (probes != null) {
        probestoExclude = new HashSet<Integer>();
        for (int p = 0; p < probes.length; p++) {
          probestoExclude.add(probes[p]);
        }
      }
      dsResults = new Result(m_numDatasets, m_numProbes, wp.getId());
      for (int d = 0; d < m_numDatasets; d++) {
        SNP dSNP = snps[d];
        dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length;
        double[][] rawData = m_expressiondata[d].getMatrix();
        double[] varY = m_expressiondata[d].getProbeVariance();
        double[] meanY = m_expressiondata[d].getProbeMean();
        int samplecount = m_expressiondata[d].getIndividuals().length;
        if (dSNP != null) {
          dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length;
          for (int pid = 0; pid < m_numProbes; pid++) {
            if (probestoExclude == null || !probestoExclude.contains(pid)) {
              Integer probeId = m_probeTranslation.get(d, pid);
              if (probeId != -9) {
                test(
                    d,
                    pid,
                    probeId,
                    snpmeancorrectedgenotypes[d],
                    originalgenotypes[d],
                    snpvariances[d],
                    varY[probeId],
                    meanY[probeId],
                    includeExpressionSample[d],
                    samplecount,
                    rawData,
                    null,
                    dsResults,
                    this.currentWP,
                    this.metaAnalyseModelCorrelationYHat,
                    this.metaAnalyseInteractionTerms,
                    this.determinefoldchange);
              } else {
                dsResults.correlations[d][pid] = Double.NaN;
                dsResults.zscores[d][pid] = Double.NaN;
              }
            } else {
              dsResults.correlations[d][pid] = Double.NaN;
              dsResults.zscores[d][pid] = Double.NaN;
            }
          }
        } else {
          for (int p = 0; p < m_numProbes; p++) {
            dsResults.correlations[d][p] = Double.NaN;
            dsResults.zscores[d][p] = Double.NaN;
          }
        }
      }
    } else {
      dsResults = new Result(m_numDatasets, m_numProbes, wp.getId());
      for (int d = 0; d < m_numDatasets; d++) {
        SNP dSNP = snps[d];
        dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length;
        double[][] rawData = m_expressiondata[d].getMatrix();
        double[] varY = m_expressiondata[d].getProbeVariance();
        double[] meanY = m_expressiondata[d].getProbeMean();
        int samplecount = m_expressiondata[d].getIndividuals().length;
        if (dSNP != null) {
          dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length;
          //                    RunTimer t2 = new RunTimer();
          for (int pid = 0; pid < m_numProbes; pid++) {
            Integer probeId = m_probeTranslation.get(d, pid);
            if (probeId != -9) {
              test(
                  d,
                  pid,
                  probeId,
                  snpmeancorrectedgenotypes[d],
                  originalgenotypes[d],
                  snpvariances[d],
                  varY[probeId],
                  meanY[probeId],
                  includeExpressionSample[d],
                  samplecount,
                  rawData,
                  null,
                  dsResults,
                  this.currentWP,
                  this.metaAnalyseModelCorrelationYHat,
                  this.metaAnalyseInteractionTerms,
                  this.determinefoldchange);
            } else {
              dsResults.correlations[d][pid] = Double.NaN;
              dsResults.zscores[d][pid] = Double.NaN;
            }
          }
          //                    System.out.println("Test: "+t2.getTimeDesc());
        } else {
          for (int p = 0; p < m_numProbes; p++) {
            dsResults.correlations[d][p] = Double.NaN;
            dsResults.zscores[d][p] = Double.NaN;
          }
        }
      }
    }

    convertResultsToPValues(wp, dsResults);

    if (m_eQTLPlotter != null) {
      for (int p = 0; p < dsResults.pvalues.length; p++) {
        double pval = dsResults.pvalues[p];
        if (!Double.isNaN(pval)) {
          if (pval < m_pvaluePlotThreshold) {
            ploteQTL(wp, p);
          }
        }
      }
    }

    snps = wp.getSnps();
    if (snps != null) {
      for (SNP snp : snps) {
        if (snp != null) {
          snp.clearGenotypes();
        }
      }
    }

    // if result output is binary, convert to bytes and deflate the set of bytes.
    //        if (m_binaryoutput) {
    //            deflateResults(wp);
    //        }
    // now push the results in the queue..
    try {
      wp.setNumTested(testsPerformed);
      m_result_queue.put(wp);
    } catch (InterruptedException e) {
      e.printStackTrace();
    }

    //        System.out.println("Analyze: "+t1.getTimeDesc());
  }