/* * perform single tail F test! * The larger variance should always be placed in the numerator */ public static boolean isEqualVariance( double variance1, double variance2, double degOfFreedom1, double degOfFreedom2, double[] stats) { /* * The hypothesis that the two variances are equal is rejected if F>Falpha,N1-1,N2-1 for an upper one-tailed test F<F1-alpha,N1-1,N2-1 for a lower one-tailed test */ if (stats == null || stats.length < 4) { throw new IllegalStateException(); } double alpha = 0.05; double numVar; double denVar; double numDf; double denDf; if (variance1 > variance2) { numVar = variance1; numDf = degOfFreedom1; denVar = variance2; denDf = degOfFreedom2; } else { numVar = variance2; numDf = degOfFreedom2; denVar = variance1; denDf = degOfFreedom1; } FDistribution fd = new FDistribution(numDf, denDf); double F = numVar / denVar; double pOneTail = 1 - fd.cumulativeProbability(F); // good double fCritical = fd.inverseCumulativeProbability(1 - alpha); stats[0] = F; stats[1] = fCritical; if (display_stats) { System.out.println("F: " + F + "\tfCritical: " + fCritical); } if (F < fCritical) { /* * If F<Fcrit then automatically p>0.05. Correspondingly, if F>=Fcrit then automatically p<=0.05. */ if (!(pOneTail > 0.05)) { System.out.println("StatAnalyzer.java -- isEqualVariance() -- something's wrong!!"); System.exit(0); } return true; } else { return false; } }
protected static void test( int d, int p, Integer probeId, double[] x, double[] originalGenotypes, double varianceX, double varianceY, double meanY, boolean[] includeExpressionSample, int sampleCount, double[][] rawData, double[][] covariateRawData, Result r, WorkPackage wp, boolean metaAnalyseModelCorrelationYHat, boolean metaAnalyseInteractionTerms, boolean determinefoldchange) { final double[] y; double[][] covariates = covariateRawData; if (x.length != sampleCount) { y = new double[x.length]; int itr = 0; double sum = 0; double[] tmpY = rawData[probeId]; // recalculate mean and variance for (int s = 0; s < sampleCount; s++) { if (includeExpressionSample[s]) { y[itr] = tmpY[s]; sum += y[itr]; itr++; } } meanY = sum / itr; if (meanY != 0) { for (int i = 0; i < y.length; ++i) { y[i] = y[i] - meanY; } meanY = 0; } varianceY = Descriptives.variance(y, meanY); if (covariates != null) { int covariateitr = 0; covariates = new double[covariateRawData.length][0]; // take only the first covariate for now.. for (int covariate = 0; covariate < covariateRawData.length; covariate++) { covariates[covariate] = new double[x.length]; for (int s = 0; s < sampleCount; s++) { if (includeExpressionSample[s]) { covariates[covariate][covariateitr] = covariateRawData[covariate][s]; covariateitr++; } } } } } else { y = new double[x.length]; System.arraycopy(rawData[probeId], 0, y, 0, x.length); } double meanX = JSci.maths.ArrayMath.mean(x); if (meanY > 0.000000001d || meanY < -0.00000001d || meanX > 0.000000001d || meanX < -0.00000001d) { double res = 0; for (double y2 : y) { res += y2; } res /= y.length; double res2 = 0; for (double x2 : x) { res2 += x2; } res2 /= x.length; throw new RuntimeException( "Error in eQTL calculation, mean of X or Y was not 0, specified mean y: " + meanY + " and really is: " + res + ", specifief mean x: " + meanX + " and really is: " + res2); } if (varianceY == 0) { r.zscores[d][p] = Double.NaN; r.correlations[d][p] = Double.NaN; } else if (covariates != null) { // TODO: what to do when there are multiple covariates involved? double[][] olsXFullWithInteraction = new double[x.length] [3]; // With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP // * CellCount for (int i = 0; i < x.length; i++) { double xi = x[i]; double covi = covariates[0][i]; olsXFullWithInteraction[i][0] = xi; olsXFullWithInteraction[i][1] = covi; olsXFullWithInteraction[i][2] = covi * xi; } OLSMultipleLinearRegression regressionFullWithInteraction = new OLSMultipleLinearRegression(); regressionFullWithInteraction.newSampleData(y, olsXFullWithInteraction); if (metaAnalyseModelCorrelationYHat) { double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); double[] yInferred = new double[y.length]; for (int s = 0; s < y.length; s++) { yInferred[s] = regressionParameters[0]; for (int a = 0; a < 3; a++) { yInferred[s] += regressionParameters[a + 1] * olsXFullWithInteraction[s][a]; } } SpearmansCorrelation spearman = new SpearmansCorrelation(); double correlationspearman = spearman.correlation(y, yInferred); double zScore = Correlation.convertCorrelationToZScore(y.length, correlationspearman); r.zscores[d][p] = zScore; r.correlations[d][p] = correlationspearman; r.beta[d][p] = regressionFullWithInteraction.calculateRSquared(); } else if (metaAnalyseInteractionTerms) { double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); double betaInteraction = regressionParameters[3]; double seInteraction = regressionStandardErrors[3]; double tInteraction = betaInteraction / seInteraction; double pValueInteraction; double zScoreInteraction; StudentT tDistColt = new cern.jet.random.tdouble.StudentT(x.length - 4, randomEngine); if (tInteraction < 0) { pValueInteraction = tDistColt.cdf(tInteraction); if (pValueInteraction < 2.0E-323) { pValueInteraction = 2.0E-323; } zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); } else { pValueInteraction = tDistColt.cdf(-tInteraction); if (pValueInteraction < 2.0E-323) { pValueInteraction = 2.0E-323; } zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); } pValueInteraction *= 2; r.zscores[d][p] = zScoreInteraction; r.correlations[d][p] = regressionFullWithInteraction.calculateRSquared(); r.se[d][p] = seInteraction; r.beta[d][p] = betaInteraction; // if (rConnection != null) { // try { // if (rConnection.isConnected()) { // rConnection.assign("y", y); // rConnection.assign("x", x); // rConnection.assign("z", covariates[0]); // rConnection.voidEval("interaction <- x*z"); // rConnection.voidEval("m <- lm(y ~ x + z + interaction)"); // rConnection.voidEval("modelsummary <- summary(m)"); // double betaInteractionR = // rConnection.eval("modelsummary$coefficients[4,1]").asDouble(); // rConnection.voidEval("m2 <- sqrt(diag(vcovHC(m, type = // 'HC0')))"); // double seInteractionRCorrected = // rConnection.eval("as.numeric(m2[4])").asDouble(); // double tInteraction = betaInteractionR / // seInteractionRCorrected; // double pValueInteraction = 1; // double zScoreInteraction = 0; // DRand randomEngine = new // cern.jet.random.tdouble.engine.DRand(); // StudentT tDistColt = new // cern.jet.random.tdouble.StudentT(x.length - 4, randomEngine); // if (tInteraction < 0) { // pValueInteraction = tDistColt.cdf(tInteraction); // if (pValueInteraction < 2.0E-323) { // pValueInteraction = 2.0E-323; // } // zScoreInteraction = // cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); // } else { // pValueInteraction = tDistColt.cdf(-tInteraction); // if (pValueInteraction < 2.0E-323) { // pValueInteraction = 2.0E-323; // } // zScoreInteraction = // -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); // } // randomNumberGenerator.zscores[d][p] = zScoreInteraction; // randomNumberGenerator.correlations[d][p] = betaInteractionR; // // // int dfresiduals = // rConnection.eval("m$df.residual").asInteger(); //// double[] coeff = // rConnection.eval("m$coefficients").asDoubles(); // intercept: 0, x: 1, z: 2, zx: 3 //// double fstat = // rConnection.eval("as.numeric(modelsummary$fstatistic['value'])").asDouble(); //// double fstatdf = // rConnection.eval("as.numeric(modelsummary$fstatistic['numdf'])").asDouble(); //// double fstatdferr = // rConnection.eval("as.numeric(modelsummary$fstatistic['dendf'])").asDouble(); //// double rsquared = // rConnection.eval("as.numeric(modelsummary$r.squared)").asDouble(); //// double seInteractionR = // rConnection.eval("modelsummary$coefficients[4,2]").asDouble(); // } else { // System.err.println("ERROR: R is not connected."); // } // } catch (REngineException ex) { // // Logger.getLogger(CalculationThread.class.getName()).log(Level.SEVERE, null, ex); // } catch (REXPMismatchException ex) { // // Logger.getLogger(CalculationThread.class.getName()).log(Level.SEVERE, null, ex); // } // } } else { double residualSS = regressionFullWithInteraction.calculateResidualSumOfSquares(); double r2 = regressionFullWithInteraction.calculateRSquared(); // calculate F statistic for the significance of the model double totalSS = regressionFullWithInteraction.calculateTotalSumOfSquares(); double modelSS = totalSS - residualSS; double dfmodel = olsXFullWithInteraction[0].length; double dferror = x.length - dfmodel - 1; double msm = modelSS / dfmodel; double mse = residualSS / dferror; double f = msm / mse; FDistribution fDist = new org.apache.commons.math3.distribution.FDistribution(dfmodel, dferror); double pvalmodel = 1 - fDist.cumulativeProbability(f); double zscore = 0; if (pvalmodel == 1d) { zscore = 0; } else if (pvalmodel == 0d) { pvalmodel = 1e-16; } try { zscore = ZScores.pToZ(pvalmodel); } catch (IllegalArgumentException e) { System.out.println(f + "\t" + pvalmodel + "\t" + zscore); for (int i = 0; i < x.length; i++) { System.out.println(i + "\t" + x[i] + "\t" + y[i] + "\t" + covariates[0][i]); } System.exit(-1); } r.zscores[d][p] = zscore; r.correlations[d][p] = r2; } } else { // Calculate correlation coefficient: double stdevy = Math.sqrt(varianceY); double stdevx = Math.sqrt(varianceX); // double stdevy = JSci.maths.ArrayMath.standardDeviation(y); // double stdevx = JSci.maths.ArrayMath.standardDeviation(x); double correlation = Correlation.correlateMeanCenteredData(x, y, (stdevy * stdevx)); if (correlation >= -1 && correlation <= 1) { double zScore = Correlation.convertCorrelationToZScore(x.length, correlation); double[] xcopy = new double[x.length]; // double meany = JSci.maths.ArrayMath.mean(y); for (int i = 0; i < y.length; i++) { y[i] /= stdevy; xcopy[i] = x[i] / stdevx; } // meany = JSci.maths.ArrayMath.mean(y); // double meanxCopy = JSci.maths.ArrayMath.mean(xcopy); // calculateRegressionCoefficients(xcopy, meanxCopy, y, meany, // randomNumberGenerator, d, p); calculateRegressionCoefficients(xcopy, y, r, d, p); if (determinefoldchange) { determineFoldchange(originalGenotypes, y, r, d, p, wp); } r.zscores[d][p] = zScore; r.correlations[d][p] = correlation; } else { // Ususally if the genotype variance is very low System.err.println( "Error! correlation invalid: " + correlation + "; genotype variance = " + varianceX + "; expression variance = " + varianceY); r.zscores[d][p] = Double.NaN; r.correlations[d][p] = Double.NaN; // System.exit(-1); } } }