示例#1
0
  /*
   * perform single tail F test!
   * The larger variance should always be placed in the numerator
   */
  public static boolean isEqualVariance(
      double variance1,
      double variance2,
      double degOfFreedom1,
      double degOfFreedom2,
      double[] stats) {
    /*
    *
    The hypothesis that the two variances are equal is rejected if
    F>Falpha,N1-1,N2-1      for an upper one-tailed test
    F<F1-alpha,N1-1,N2-1      for a lower one-tailed test

    */
    if (stats == null || stats.length < 4) {
      throw new IllegalStateException();
    }
    double alpha = 0.05;
    double numVar;
    double denVar;
    double numDf;
    double denDf;
    if (variance1 > variance2) {
      numVar = variance1;
      numDf = degOfFreedom1;
      denVar = variance2;
      denDf = degOfFreedom2;
    } else {
      numVar = variance2;
      numDf = degOfFreedom2;
      denVar = variance1;
      denDf = degOfFreedom1;
    }

    FDistribution fd = new FDistribution(numDf, denDf);
    double F = numVar / denVar;
    double pOneTail = 1 - fd.cumulativeProbability(F); // good
    double fCritical = fd.inverseCumulativeProbability(1 - alpha);
    stats[0] = F;
    stats[1] = fCritical;
    if (display_stats) {
      System.out.println("F: " + F + "\tfCritical: " + fCritical);
    }
    if (F < fCritical) {
      /*
       * If F<Fcrit  then automatically p>0.05. Correspondingly, if F>=Fcrit then automatically p<=0.05.
       */
      if (!(pOneTail > 0.05)) {
        System.out.println("StatAnalyzer.java -- isEqualVariance() -- something's wrong!!");
        System.exit(0);
      }
      return true;
    } else {
      return false;
    }
  }
  protected static void test(
      int d,
      int p,
      Integer probeId,
      double[] x,
      double[] originalGenotypes,
      double varianceX,
      double varianceY,
      double meanY,
      boolean[] includeExpressionSample,
      int sampleCount,
      double[][] rawData,
      double[][] covariateRawData,
      Result r,
      WorkPackage wp,
      boolean metaAnalyseModelCorrelationYHat,
      boolean metaAnalyseInteractionTerms,
      boolean determinefoldchange) {
    final double[] y;
    double[][] covariates = covariateRawData;
    if (x.length != sampleCount) {
      y = new double[x.length];
      int itr = 0;
      double sum = 0;
      double[] tmpY = rawData[probeId];

      // recalculate mean and variance
      for (int s = 0; s < sampleCount; s++) {
        if (includeExpressionSample[s]) {
          y[itr] = tmpY[s];
          sum += y[itr];
          itr++;
        }
      }
      meanY = sum / itr;

      if (meanY != 0) {
        for (int i = 0; i < y.length; ++i) {
          y[i] = y[i] - meanY;
        }
        meanY = 0;
      }

      varianceY = Descriptives.variance(y, meanY);

      if (covariates != null) {
        int covariateitr = 0;
        covariates =
            new double[covariateRawData.length][0]; // take only the first covariate for now..
        for (int covariate = 0; covariate < covariateRawData.length; covariate++) {
          covariates[covariate] = new double[x.length];
          for (int s = 0; s < sampleCount; s++) {
            if (includeExpressionSample[s]) {
              covariates[covariate][covariateitr] = covariateRawData[covariate][s];
              covariateitr++;
            }
          }
        }
      }

    } else {
      y = new double[x.length];
      System.arraycopy(rawData[probeId], 0, y, 0, x.length);
    }
    double meanX = JSci.maths.ArrayMath.mean(x);
    if (meanY > 0.000000001d
        || meanY < -0.00000001d
        || meanX > 0.000000001d
        || meanX < -0.00000001d) {

      double res = 0;
      for (double y2 : y) {
        res += y2;
      }
      res /= y.length;

      double res2 = 0;
      for (double x2 : x) {
        res2 += x2;
      }
      res2 /= x.length;

      throw new RuntimeException(
          "Error in eQTL calculation, mean of X or Y was not 0, specified mean y: "
              + meanY
              + " and really is: "
              + res
              + ", specifief mean x: "
              + meanX
              + " and really is: "
              + res2);
    }

    if (varianceY == 0) {
      r.zscores[d][p] = Double.NaN;
      r.correlations[d][p] = Double.NaN;
    } else if (covariates != null) {

      // TODO: what to do when there are multiple covariates involved?
      double[][] olsXFullWithInteraction =
          new double[x.length]
              [3]; // With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP
                   // * CellCount
      for (int i = 0; i < x.length; i++) {
        double xi = x[i];
        double covi = covariates[0][i];
        olsXFullWithInteraction[i][0] = xi;
        olsXFullWithInteraction[i][1] = covi;
        olsXFullWithInteraction[i][2] = covi * xi;
      }

      OLSMultipleLinearRegression regressionFullWithInteraction = new OLSMultipleLinearRegression();
      regressionFullWithInteraction.newSampleData(y, olsXFullWithInteraction);

      if (metaAnalyseModelCorrelationYHat) {

        double[] regressionParameters =
            regressionFullWithInteraction.estimateRegressionParameters();
        double[] yInferred = new double[y.length];
        for (int s = 0; s < y.length; s++) {
          yInferred[s] = regressionParameters[0];
          for (int a = 0; a < 3; a++) {
            yInferred[s] += regressionParameters[a + 1] * olsXFullWithInteraction[s][a];
          }
        }

        SpearmansCorrelation spearman = new SpearmansCorrelation();
        double correlationspearman = spearman.correlation(y, yInferred);
        double zScore = Correlation.convertCorrelationToZScore(y.length, correlationspearman);
        r.zscores[d][p] = zScore;
        r.correlations[d][p] = correlationspearman;
        r.beta[d][p] = regressionFullWithInteraction.calculateRSquared();
      } else if (metaAnalyseInteractionTerms) {

        double[] regressionParameters =
            regressionFullWithInteraction.estimateRegressionParameters();
        double[] regressionStandardErrors =
            regressionFullWithInteraction.estimateRegressionParametersStandardErrors();

        double betaInteraction = regressionParameters[3];
        double seInteraction = regressionStandardErrors[3];
        double tInteraction = betaInteraction / seInteraction;
        double pValueInteraction;
        double zScoreInteraction;

        StudentT tDistColt = new cern.jet.random.tdouble.StudentT(x.length - 4, randomEngine);
        if (tInteraction < 0) {
          pValueInteraction = tDistColt.cdf(tInteraction);
          if (pValueInteraction < 2.0E-323) {
            pValueInteraction = 2.0E-323;
          }
          zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
        } else {
          pValueInteraction = tDistColt.cdf(-tInteraction);
          if (pValueInteraction < 2.0E-323) {
            pValueInteraction = 2.0E-323;
          }
          zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
        }
        pValueInteraction *= 2;
        r.zscores[d][p] = zScoreInteraction;
        r.correlations[d][p] = regressionFullWithInteraction.calculateRSquared();
        r.se[d][p] = seInteraction;
        r.beta[d][p] = betaInteraction;

        //                if (rConnection != null) {
        //                    try {
        //                        if (rConnection.isConnected()) {
        //                            rConnection.assign("y", y);
        //                            rConnection.assign("x", x);
        //                            rConnection.assign("z", covariates[0]);
        //                            rConnection.voidEval("interaction <- x*z");
        //                            rConnection.voidEval("m <- lm(y ~ x + z + interaction)");
        //                            rConnection.voidEval("modelsummary <- summary(m)");
        //                            double betaInteractionR =
        // rConnection.eval("modelsummary$coefficients[4,1]").asDouble();
        //                            rConnection.voidEval("m2 <- sqrt(diag(vcovHC(m, type =
        // 'HC0')))");
        //                            double seInteractionRCorrected =
        // rConnection.eval("as.numeric(m2[4])").asDouble();
        //                            double tInteraction = betaInteractionR /
        // seInteractionRCorrected;
        //                            double pValueInteraction = 1;
        //                            double zScoreInteraction = 0;
        //                            DRand randomEngine = new
        // cern.jet.random.tdouble.engine.DRand();
        //                            StudentT tDistColt = new
        // cern.jet.random.tdouble.StudentT(x.length - 4, randomEngine);
        //                            if (tInteraction < 0) {
        //                                pValueInteraction = tDistColt.cdf(tInteraction);
        //                                if (pValueInteraction < 2.0E-323) {
        //                                    pValueInteraction = 2.0E-323;
        //                                }
        //                                zScoreInteraction =
        // cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
        //                            } else {
        //                                pValueInteraction = tDistColt.cdf(-tInteraction);
        //                                if (pValueInteraction < 2.0E-323) {
        //                                    pValueInteraction = 2.0E-323;
        //                                }
        //                                zScoreInteraction =
        // -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
        //                            }
        //                            randomNumberGenerator.zscores[d][p] = zScoreInteraction;
        //                            randomNumberGenerator.correlations[d][p] = betaInteractionR;
        //
        //                            //                            int dfresiduals =
        // rConnection.eval("m$df.residual").asInteger();
        ////                            double[] coeff =
        // rConnection.eval("m$coefficients").asDoubles();  // intercept: 0, x: 1, z: 2, zx: 3
        ////                            double fstat =
        // rConnection.eval("as.numeric(modelsummary$fstatistic['value'])").asDouble();
        ////                            double fstatdf =
        // rConnection.eval("as.numeric(modelsummary$fstatistic['numdf'])").asDouble();
        ////                            double fstatdferr =
        // rConnection.eval("as.numeric(modelsummary$fstatistic['dendf'])").asDouble();
        ////                            double rsquared =
        // rConnection.eval("as.numeric(modelsummary$r.squared)").asDouble();
        ////                            double seInteractionR =
        // rConnection.eval("modelsummary$coefficients[4,2]").asDouble();
        //                        } else {
        //                            System.err.println("ERROR: R is not connected.");
        //                        }
        //                    } catch (REngineException ex) {
        //
        // Logger.getLogger(CalculationThread.class.getName()).log(Level.SEVERE, null, ex);
        //                    } catch (REXPMismatchException ex) {
        //
        // Logger.getLogger(CalculationThread.class.getName()).log(Level.SEVERE, null, ex);
        //                    }
        //                }

      } else {
        double residualSS = regressionFullWithInteraction.calculateResidualSumOfSquares();
        double r2 = regressionFullWithInteraction.calculateRSquared();
        // calculate F statistic for the significance of the model
        double totalSS = regressionFullWithInteraction.calculateTotalSumOfSquares();
        double modelSS = totalSS - residualSS;
        double dfmodel = olsXFullWithInteraction[0].length;
        double dferror = x.length - dfmodel - 1;
        double msm = modelSS / dfmodel;
        double mse = residualSS / dferror;
        double f = msm / mse;
        FDistribution fDist =
            new org.apache.commons.math3.distribution.FDistribution(dfmodel, dferror);
        double pvalmodel = 1 - fDist.cumulativeProbability(f);
        double zscore = 0;
        if (pvalmodel == 1d) {
          zscore = 0;
        } else if (pvalmodel == 0d) {
          pvalmodel = 1e-16;
        }
        try {
          zscore = ZScores.pToZ(pvalmodel);
        } catch (IllegalArgumentException e) {
          System.out.println(f + "\t" + pvalmodel + "\t" + zscore);
          for (int i = 0; i < x.length; i++) {
            System.out.println(i + "\t" + x[i] + "\t" + y[i] + "\t" + covariates[0][i]);
          }
          System.exit(-1);
        }

        r.zscores[d][p] = zscore;
        r.correlations[d][p] = r2;
      }
    } else {
      // Calculate correlation coefficient:
      double stdevy = Math.sqrt(varianceY);
      double stdevx = Math.sqrt(varianceX);

      //                double stdevy = JSci.maths.ArrayMath.standardDeviation(y);
      //                double stdevx = JSci.maths.ArrayMath.standardDeviation(x);
      double correlation = Correlation.correlateMeanCenteredData(x, y, (stdevy * stdevx));

      if (correlation >= -1 && correlation <= 1) {
        double zScore = Correlation.convertCorrelationToZScore(x.length, correlation);
        double[] xcopy = new double[x.length];
        //                double meany = JSci.maths.ArrayMath.mean(y);
        for (int i = 0; i < y.length; i++) {
          y[i] /= stdevy;
          xcopy[i] = x[i] / stdevx;
        }

        //                meany = JSci.maths.ArrayMath.mean(y);
        //                double meanxCopy = JSci.maths.ArrayMath.mean(xcopy);
        //                calculateRegressionCoefficients(xcopy, meanxCopy, y, meany,
        // randomNumberGenerator, d, p);
        calculateRegressionCoefficients(xcopy, y, r, d, p);
        if (determinefoldchange) {
          determineFoldchange(originalGenotypes, y, r, d, p, wp);
        }
        r.zscores[d][p] = zScore;
        r.correlations[d][p] = correlation;
      } else {
        // Ususally if the genotype variance is very low
        System.err.println(
            "Error! correlation invalid: "
                + correlation
                + "; genotype variance = "
                + varianceX
                + "; expression variance = "
                + varianceY);
        r.zscores[d][p] = Double.NaN;
        r.correlations[d][p] = Double.NaN;
        // System.exit(-1);
      }
    }
  }
 @Override
 public Double sample() {
   return distribution.sample();
 }