@Override public void run() throws IOException { for (int d = 0; d < m_gg.length; d++) { // now test all triples SNPLoader snpLoader = m_gg[d].getGenotypeData().createSNPLoader(); int[] indWGA = m_gg[d].getExpressionToGenotypeIdArray(); for (int perm = 0; perm < m_settings.nrPermutationsFDR + 1; perm++) { String outfile = null; if (perm == 0) { outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-RealData.txt"; } else { outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-PermutationRound-" + perm + ".txt"; m_gg[d].permuteSampleLables(); } TextFile out = new TextFile(outfile, TextFile.W); Iterator<Triple<String, String, String>> it = snpProbeCombos.iterator(); Triple<String, String, String> next = it.next(); ProgressBar pb = new ProgressBar( snpProbeCombos.size(), "Running Mediation Analysis - Permutation " + perm); out.writeln( "SNP\tSNP Chr\tSNP ChrPos\t" + "Alleles\tDirectionAllele\t" + "N\t" + "CisArrayAddress\tCisProbe Chr\tCisProbe ChrPos\t" + "CisGeneName\t" + "TransArrayAddress\tTransProbe Chr\tTransProbe ChrPos\t" + "TransGeneName\t" + "CisTrans-Correlation\t" + "Cis-eQTL-Beta\t" + "Cis-eQTL-SE\t" + "CisTrans-Beta\t" + "CisTrans-SE\t" + "Trans-eQTL-Beta\t" + "Trans-eQTL-SE\t" + "CisTrans-Residual-Correlation\t" + "CisTrans-Residual-Beta\t" + "CisTrans-Residual-SE\t" + "Trans-eQTL-Residual-Beta\t" + "Trans-eQTL-Residual-SE\t" + "Beta-Ratio"); while (next != null) { String snp = next.getLeft(); String cisprobe = next.getMiddle(); String transprobe = next.getRight(); Integer snpId = m_gg[d].getGenotypeData().getSnpToSNPId().get(snp); Integer cisProbeId = m_gg[d].getExpressionData().getProbeToId().get(cisprobe); Integer transProbeId = m_gg[d].getExpressionData().getProbeToId().get(transprobe); if (snpId == -9 || cisProbeId == null || transProbeId == null) { // out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" + // cisProbeId + "\t" + null + "\t" + transprobe + "\t" + transProbeId + "\t" + null + // "\t" + null + "\t" + null + "\t" + null + "\t" + null + // "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); } else { SNP snpObj = m_gg[d].getGenotypeData().getSNPObject(snpId); snpLoader.loadGenotypes(snpObj); if (snpLoader.hasDosageInformation()) { snpLoader.loadDosage(snpObj); } double[] origCisVals = m_gg[d].getExpressionData().getMatrix()[cisProbeId]; double[] origTransVals = m_gg[d].getExpressionData().getMatrix()[transProbeId]; int calledGenotypes = 0; for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) { int genotypeId = indWGA[i]; short gt = snpObj.getGenotypes()[genotypeId]; if (genotypeId > -1 && gt > -1) { calledGenotypes++; } } double[] genotypes = new double[calledGenotypes]; double[] cisvals = new double[calledGenotypes]; double[] transvals = new double[calledGenotypes]; calledGenotypes = 0; for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) { int genotypeId = indWGA[i]; short gt = snpObj.getGenotypes()[genotypeId]; if (genotypeId > -1 && gt > -1) { genotypes[calledGenotypes] = snpObj.getDosageValues()[genotypeId]; cisvals[calledGenotypes] = origCisVals[i]; transvals[calledGenotypes] = origTransVals[i]; calledGenotypes++; } } // normalize genotype and cis + trans to get beta's equal to the correlation coefficient genotypes = normalize(genotypes); cisvals = normalize(cisvals); transvals = normalize(transvals); double corrCisTrans = JSci.maths.ArrayMath.correlation(cisvals, transvals); // for code validation double[] cisTransRCs = Regression.getLinearRegressionCoefficients( cisvals, transvals); // returns beta, alpha, se, t double[] snpCisRCs = Regression.getLinearRegressionCoefficients( genotypes, cisvals); // returns beta, alpha, se, t double[] snpTransRCs = Regression.getLinearRegressionCoefficients(genotypes, transvals); // remove correlation between cis and trans probe // double[] resCis = new double[cisvals.length]; double[] resTransVals = new double[cisvals.length]; for (int i = 0; i < resTransVals.length; i++) { // resCis[i] = cisvals[i] - snpCisRCs[0] * genotypes[i]; resTransVals[i] = transvals[i] - cisTransRCs[0] * cisvals[i]; } resTransVals = normalize(resTransVals); double[] cisResTransRCs = Regression.getLinearRegressionCoefficients( cisvals, resTransVals); // returns beta, alpha, se, t double[] snpResTransRCs = Regression.getLinearRegressionCoefficients(genotypes, resTransVals); double rescorr = JSci.maths.ArrayMath.correlation(cisvals, resTransVals); // for code validation out.writeln( snp + "\t" + snpObj.getChr() + "\t" + snpObj.getChrPos() + "\t" + BaseAnnot.toString(snpObj.getAlleles()[0]) + "/" + BaseAnnot.toString(snpObj.getAlleles()[1]) + "\t" + BaseAnnot.toString(snpObj.getAlleles()[0]) + "\t" + transvals.length + "\t" + cisprobe + "\t" + m_gg[d].getExpressionData().getChr()[cisProbeId] + "\t" + m_gg[d].getExpressionData().getChrStart()[cisProbeId] + ":" + m_gg[d].getExpressionData().getChrStop()[cisProbeId] + "\t" + m_gg[d].getExpressionData().getAnnotation()[cisProbeId] + "\t" + transprobe + "\t" + m_gg[d].getExpressionData().getChr()[transProbeId] + "\t" + m_gg[d].getExpressionData().getChrStart()[transProbeId] + ":" + m_gg[d].getExpressionData().getChrStop()[transProbeId] + "\t" + m_gg[d].getExpressionData().getAnnotation()[transProbeId] + "\t" + corrCisTrans + "\t" + snpCisRCs[0] + "\t" + snpCisRCs[2] + "\t" + cisTransRCs[0] + "\t" + cisTransRCs[2] + "\t" + snpTransRCs[0] + "\t" + snpTransRCs[2] + "\t" + rescorr + "\t" + cisResTransRCs[0] + "\t" + cisResTransRCs[2] + "\t" + snpResTransRCs[0] + "\t" + snpResTransRCs[2] + "\t" + (snpResTransRCs[0] / snpTransRCs[0])); snpObj.clearGenotypes(); } if (it.hasNext()) { next = it.next(); } else { next = null; } pb.iterate(); } pb.close(); out.close(); } snpLoader.close(); } }
private void analyze(WorkPackage wp) { testsPerformed = 0; currentWP = wp; wp.setNumTested(0); // RunTimer t1 = new RunTimer(); // load SNP genotypes SNP[] snps = wp.getSnps(); int[] probes = wp.getProbes(); Result dsResults = null; double[] snpvariances = new double[m_numDatasets]; double[][] snpmeancorrectedgenotypes = new double[m_numDatasets][0]; double[][] originalgenotypes = new double[m_numDatasets][0]; boolean[][] includeExpressionSample = new boolean[m_numDatasets][0]; for (int d = 0; d < m_numDatasets; d++) { SNP dSNP = snps[d]; if (dSNP != null) { double[] x = dSNP.selectGenotypes(m_expressionToGenotypeIds[d], false, true); originalgenotypes[d] = dSNP.selectGenotypes(m_expressionToGenotypeIds[d], false, false); int xLen = x.length; double meanX = JSci.maths.ArrayMath.mean(x); snpmeancorrectedgenotypes[d] = new double[xLen]; for (int i = 0; i < xLen; i++) { snpmeancorrectedgenotypes[d][i] = x[i] - meanX; } double varianceX = JSci.maths.ArrayMath.variance(x); if (varianceX != 0) { snpvariances[d] = varianceX; int inds[] = m_expressionToGenotypeIds[d]; int sampleCount = m_expressionToGenotypeIds[d].length; includeExpressionSample[d] = new boolean[sampleCount]; byte[] genotypes = dSNP.getGenotypes(); for (int s = 0; s < sampleCount; s++) { int ind = inds[s]; double valX = genotypes[ind]; // loadedSNPGenotype[ind]; if (valX != -1) { includeExpressionSample[d][s] = true; } else { includeExpressionSample[d][s] = false; } } } else { dSNP.clearGenotypes(); dSNP = null; wp.getFlipSNPAlleles()[d] = null; snps[d] = null; } } } if (cisOnly) { dsResults = new Result(m_numDatasets, wp.getProbes().length, wp.getId()); for (int d = 0; d < m_numDatasets; d++) { SNP dSNP = snps[d]; if (dSNP != null) { dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length; double[][] rawData = m_expressiondata[d].getMatrix(); double[] varY = m_expressiondata[d].getProbeVariance(); double[] meanY = m_expressiondata[d].getProbeMean(); int samplecount = m_expressiondata[d].getIndividuals().length; double[][] covariates = null; if (m_covariates != null) { DoubleMatrixDataset<String, String> covariateData = m_covariates[d]; covariates = covariateData.rawData; } for (int p = 0; p < probes.length; p++) { int pid = probes[p]; Integer probeId = m_probeTranslation.get(d, pid); if (probeId != -9) { test( d, p, probeId, snpmeancorrectedgenotypes[d], originalgenotypes[d], snpvariances[d], varY[probeId], meanY[probeId], includeExpressionSample[d], samplecount, rawData, covariates, dsResults, this.currentWP, this.metaAnalyseModelCorrelationYHat, this.metaAnalyseInteractionTerms, this.determinefoldchange); } else { dsResults.correlations[d][p] = Double.NaN; dsResults.zscores[d][p] = Double.NaN; } } } else { for (int p = 0; p < probes.length; p++) { dsResults.correlations[d][p] = Double.NaN; dsResults.zscores[d][p] = Double.NaN; } } } } else if (transOnly) { HashSet<Integer> probestoExclude = null; if (probes != null) { probestoExclude = new HashSet<Integer>(); for (int p = 0; p < probes.length; p++) { probestoExclude.add(probes[p]); } } dsResults = new Result(m_numDatasets, m_numProbes, wp.getId()); for (int d = 0; d < m_numDatasets; d++) { SNP dSNP = snps[d]; dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length; double[][] rawData = m_expressiondata[d].getMatrix(); double[] varY = m_expressiondata[d].getProbeVariance(); double[] meanY = m_expressiondata[d].getProbeMean(); int samplecount = m_expressiondata[d].getIndividuals().length; if (dSNP != null) { dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length; for (int pid = 0; pid < m_numProbes; pid++) { if (probestoExclude == null || !probestoExclude.contains(pid)) { Integer probeId = m_probeTranslation.get(d, pid); if (probeId != -9) { test( d, pid, probeId, snpmeancorrectedgenotypes[d], originalgenotypes[d], snpvariances[d], varY[probeId], meanY[probeId], includeExpressionSample[d], samplecount, rawData, null, dsResults, this.currentWP, this.metaAnalyseModelCorrelationYHat, this.metaAnalyseInteractionTerms, this.determinefoldchange); } else { dsResults.correlations[d][pid] = Double.NaN; dsResults.zscores[d][pid] = Double.NaN; } } else { dsResults.correlations[d][pid] = Double.NaN; dsResults.zscores[d][pid] = Double.NaN; } } } else { for (int p = 0; p < m_numProbes; p++) { dsResults.correlations[d][p] = Double.NaN; dsResults.zscores[d][p] = Double.NaN; } } } } else { dsResults = new Result(m_numDatasets, m_numProbes, wp.getId()); for (int d = 0; d < m_numDatasets; d++) { SNP dSNP = snps[d]; dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length; double[][] rawData = m_expressiondata[d].getMatrix(); double[] varY = m_expressiondata[d].getProbeVariance(); double[] meanY = m_expressiondata[d].getProbeMean(); int samplecount = m_expressiondata[d].getIndividuals().length; if (dSNP != null) { dsResults.numSamples[d] = snpmeancorrectedgenotypes[d].length; // RunTimer t2 = new RunTimer(); for (int pid = 0; pid < m_numProbes; pid++) { Integer probeId = m_probeTranslation.get(d, pid); if (probeId != -9) { test( d, pid, probeId, snpmeancorrectedgenotypes[d], originalgenotypes[d], snpvariances[d], varY[probeId], meanY[probeId], includeExpressionSample[d], samplecount, rawData, null, dsResults, this.currentWP, this.metaAnalyseModelCorrelationYHat, this.metaAnalyseInteractionTerms, this.determinefoldchange); } else { dsResults.correlations[d][pid] = Double.NaN; dsResults.zscores[d][pid] = Double.NaN; } } // System.out.println("Test: "+t2.getTimeDesc()); } else { for (int p = 0; p < m_numProbes; p++) { dsResults.correlations[d][p] = Double.NaN; dsResults.zscores[d][p] = Double.NaN; } } } } convertResultsToPValues(wp, dsResults); if (m_eQTLPlotter != null) { for (int p = 0; p < dsResults.pvalues.length; p++) { double pval = dsResults.pvalues[p]; if (!Double.isNaN(pval)) { if (pval < m_pvaluePlotThreshold) { ploteQTL(wp, p); } } } } snps = wp.getSnps(); if (snps != null) { for (SNP snp : snps) { if (snp != null) { snp.clearGenotypes(); } } } // if result output is binary, convert to bytes and deflate the set of bytes. // if (m_binaryoutput) { // deflateResults(wp); // } // now push the results in the queue.. try { wp.setNumTested(testsPerformed); m_result_queue.put(wp); } catch (InterruptedException e) { e.printStackTrace(); } // System.out.println("Analyze: "+t1.getTimeDesc()); }