/** * @param method * @param survivalInfoList * @param useStrata * @return */ public static double[] process( CoxMethod method, ArrayList<SurvivalInfo> survivalInfoList, boolean useStrata) { int i, j; int lastone; int n = survivalInfoList.size(); double deaths, denom = 0, e_denom = 0; double hazard; double temp, wtsum; double downwt; double[] time = new double[n]; double[] status = new double[n]; double[] strata = new double[n]; double[] wt = new double[n]; double[] score = new double[n]; double[] expect = new double[survivalInfoList.size()]; for (int p = 0; p < n; p++) { SurvivalInfo si = survivalInfoList.get(p); time[p] = si.getTime(); status[p] = si.getStatus(); if (useStrata) { strata[p] = si.getStrata(); } else { strata[p] = 0; } wt[p] = si.getWeight(); score[p] = si.getScore(); } strata[n - 1] = 1; /*failsafe */ /* Pass 1-- store the risk denominator in 'expect' */ for (i = n - 1; i >= 0; i--) { // Error because of no bounds checking in C it is an error on the get i - 1 // SurvivalInfo si = survivalInfoList.get(i); if (strata[i] == 1) { denom = 0; // strata[i] } denom += score[i] * wt[i]; // score[i]*wt[i]; if (i == 0 || strata[i - 1] == 1 || time[i - 1] != time[i]) // strata[i-1]==1 || time[i-1]!=time[i] { // si.setResidual(denom); expect[i] = denom; } else { // si.setResidual(0); //expect[i] =0; expect[i] = 0; } } /* Pass 2-- now do the work */ deaths = 0; wtsum = 0; e_denom = 0; hazard = 0; lastone = 0; for (i = 0; i < n; i++) { // SurvivalInfo si = survivalInfoList.get(i); // SurvivalInfo sip1 = null; // if (i + 1 < n) { // sip1 = survivalInfoList.get(i + 1); // } // if (si.getResidual() != 0) { // denom = si.getResidual(); // } if (expect[i] != 0) denom = expect[i]; // si.setResidual(status[i]);//expect[i] = status[i]; expect[i] = status[i]; deaths += status[i]; // status[i]; wtsum += status[i] * wt[i]; // status[i]*wt[i]; e_denom += score[i] * status[i] * wt[i]; // score[i]*status[i] *wt[i]; if (strata[i] == 1 || time[i + 1] != time[i]) { // strata[i]==1 || time[i+1]!=time[i] /*last subject of a set of tied times */ if (deaths < 2 || method == CoxMethod.Breslow) { // *method==0 hazard += wtsum / denom; for (j = lastone; j <= i; j++) { // SurvivalInfo sj = survivalInfoList.get(j); // double res = sj.getResidual() - score[j] * hazard; expect[j] -= score[j] * hazard; // sj.setResidual(res); //expect[j] -= score[j]*hazard; } } else { temp = hazard; wtsum /= deaths; for (j = 0; j < deaths; j++) { downwt = j / deaths; hazard += wtsum / (denom - e_denom * downwt); temp += wtsum * (1 - downwt) / (denom - e_denom * downwt); } for (j = lastone; j <= i; j++) { // SurvivalInfo sj = survivalInfoList.get(j); if (status[j] == 0) { // this appears to be an error for = - versus -= // double res = -score[j] * hazard; expect[j] = -score[j] * hazard; // sj.setResidual(res);//expect[j] = -score[j]*hazard; This appears to be an error // of -score vs -= } else { // double res = sj.getResidual() - score[j] * temp; expect[j] -= score[j] * temp; // expect[j] -= score[j]* temp; } } } lastone = i + 1; deaths = 0; wtsum = 0; e_denom = 0; } if (strata[i] == 1) { hazard = 0; } } for (j = lastone; j < n; j++) { expect[j] -= score[j] * hazard; } return expect; }
/** * @param variables * @param data * @param maxiter * @param method * @param eps * @param toler * @param beta * @param doscale * @param useStrata * @param useWeighted * @param robust * @param cluster * @return * @throws Exception */ public CoxInfo process( ArrayList<String> variables, ArrayList<SurvivalInfo> data, int maxiter, CoxMethod method, double eps, double toler, double[] beta, int doscale, boolean useStrata, boolean useWeighted, boolean robust, boolean cluster) throws Exception { // make sure data is converted to numbers if labels are used SurvivalInfoHelper.categorizeData(data); // create variables if testing for interaction for (String variable : variables) { if (variable.indexOf(":") != -1) { String[] d = variable.split(":"); SurvivalInfoHelper.addInteraction(d[0], d[1], data); } } Collections.sort(data); // Collections.reverse(data); CoxInfo coxInfo = new CoxInfo(); coxInfo.setSurvivalInfoList(data); int i, j, k, person; boolean gotofinish = false; double cmat[][], imat[][]; /*ragged arrays covar[][], */ double wtave; double a[], newbeta[]; double a2[], cmat2[][]; double scale[]; double denom = 0, zbeta, risk; double temp, temp2; int ndead; /* actually, the sum of their weights */ double newlk = 0; double dtime, d2; double deadwt; /*sum of case weights for the deaths*/ double efronwt; /* sum of weighted risk scores for the deaths*/ int halving; /*are we doing step halving at the moment? */ int nrisk = 0; /* number of subjects in the current risk set */ /* copies of scalar input arguments */ int nused, nvar; /* vector inputs */ // double *time, *weights, *offset; // int *status, *strata; /* returned objects */ // double imat2[][]; double u[], loglik[], means[]; double sctest; int flag = 0; int iter = 0; // SEXP rlist, rlistnames; // int nprotect; /* number of protect calls I have issued */ /* get local copies of some input args */ nused = data.size(); // LENGTH(offset2); nvar = variables.size(); // ncols(covar2); // imat2 = new double[nvar][nvar]; // nprotect++; imat = new double[nvar][nvar]; // dmatrix(REAL(imat2), nvar, nvar); a = new double[nvar]; // (double *) R_alloc(2*nvar*nvar + 4*nvar, sizeof(double)); newbeta = new double[nvar]; // a + nvar; a2 = new double[nvar]; // newbeta + nvar; scale = new double[nvar]; // a2 + nvar; cmat = new double[nvar][nvar]; // dmatrix(scale + nvar, nvar, nvar); cmat2 = new double[nvar][nvar]; // dmatrix(scale + nvar +nvar*nvar, nvar, nvar); /* ** create output variables */ // PROTECT(beta2 = duplicate(ibeta)); // beta = REAL(beta2); // beta = new double[nvar]; // beta = beta2; // PROTECT(means2 = allocVector(REALSXP, nvar)); // means = REAL(means2); means = new double[nvar]; double[] sd = new double[nvar]; double[] se = new double[nvar]; // means = means2; // PROTECT(u2 = allocVector(REALSXP, nvar)); // u = REAL(u2); u = new double[nvar]; // u = u2; // PROTECT(loglik2 = allocVector(REALSXP, 2)); // loglik = REAL(loglik2); loglik = new double[2]; // loglik = loglik2; // PROTECT(sctest2 = allocVector(REALSXP, 1)); // sctest = REAL(sctest2); // sctest = new double[1]; // sctest = sctest2; // PROTECT(flag2 = allocVector(INTSXP, 1)); // flag = INTEGER(flag2); // flag = new int[1]; // flag = flag2; // PROTECT(iter2 = allocVector(INTSXP, 1)); // iterations = INTEGER(iter2); // iterations = new int[1]; // iterations = iter2; // nprotect += 7; /* ** Subtract the mean from each covar, as this makes the regression ** much more stable. */ double[] time = new double[nused]; int[] status = new int[nused]; double[] offset = new double[nused]; double[] weights = new double[nused]; int[] strata = new int[nused]; double[][] covar = new double[nvar][nused]; ArrayList<String> clusterList = null; if (cluster) { clusterList = new ArrayList<String>(); } // copy data over to local arrays to minimuze changing code for (person = 0; person < nused; person++) { SurvivalInfo si = data.get(person); time[person] = si.getTime(); status[person] = si.getStatus(); offset[person] = si.getOffset(); if (cluster) { if (si.getClusterValue() == null && si.getClusterValue().length() == 0) { throw new Exception("Cluster value is not valid for " + si.toString()); } clusterList.add(si.getClusterValue()); } if (useWeighted) { weights[person] = si.getWeight(); } else { weights[person] = 1.0; } if (useStrata) { strata[person] = si.getStrata(); } else { strata[person] = 0; } for (i = 0; i < variables.size(); i++) { String variable = variables.get(i); covar[i][person] = si.getVariable(variable); } } double tempsd = 0; i = 0; for (i = 0; i < nvar; i++) { temp = 0; tempsd = 0; // calculate the mean sd for (person = 0; person < nused; person++) { temp += covar[i][person]; // * weights[person]; tempsd += (covar[i][person]) * (covar[i][person]); // *weights[person] * weights[person] } temp /= nused; // temp /= weightCount; means[i] = temp; tempsd /= nused; // tempsd /= weightCount; tempsd = Math.sqrt(tempsd - temp * temp); sd[i] = tempsd; // standard deviation // subtract the mean for (person = 0; person < nused; person++) { covar[i][person] -= temp; } if (doscale == 1) { /* and also scale it */ temp = 0; for (person = 0; person < nused; person++) { temp += Math.abs(covar[i][person]); // fabs } if (temp > 0) { temp = nused / temp; /* scaling */ } else { temp = 1.0; /* rare case of a constant covariate */ } scale[i] = temp; for (person = 0; person < nused; person++) { covar[i][person] *= temp; } } } if (doscale == 1) { for (i = 0; i < nvar; i++) { beta[i] /= scale[i]; /*rescale initial betas */ } } else { for (i = 0; i < nvar; i++) { scale[i] = 1.0; } } /* ** do the initial iteration step */ strata[nused - 1] = 1; loglik[1] = 0; for (i = 0; i < nvar; i++) { u[i] = 0; // u = s1 a2[i] = 0; // a2 = a for (j = 0; j < nvar; j++) { imat[i][j] = 0; // s2 cmat2[i][j] = 0; // a } } for (person = nused - 1; person >= 0; ) { if (strata[person] == 1) { nrisk = 0; denom = 0; for (i = 0; i < nvar; i++) { a[i] = 0; for (j = 0; j < nvar; j++) { cmat[i][j] = 0; } } } dtime = time[person]; ndead = 0; /*number of deaths at this time point */ deadwt = 0; /* sum of weights for the deaths */ efronwt = 0; /* sum of weighted risks for the deaths */ while (person >= 0 && time[person] == dtime) { /* walk through the this set of tied times */ nrisk++; zbeta = offset[person]; /* form the term beta*z (vector mult) */ for (i = 0; i < nvar; i++) { zbeta += beta[i] * covar[i][person]; // x } zbeta = coxsafe(zbeta); risk = Math.exp(zbeta) * weights[person]; // risk = v denom += risk; /* a is the vector of weighted sums of x, cmat sums of squares */ for (i = 0; i < nvar; i++) { a[i] += risk * covar[i][person]; // a = s1 for (j = 0; j <= i; j++) { cmat[i][j] += risk * covar[i][person] * covar[j][person]; // cmat = s2; } } if (status[person] == 1) { ndead++; deadwt += weights[person]; efronwt += risk; loglik[1] += weights[person] * zbeta; for (i = 0; i < nvar; i++) { u[i] += weights[person] * covar[i][person]; } if (method == CoxMethod.Efron) { /* Efron */ for (i = 0; i < nvar; i++) { a2[i] += risk * covar[i][person]; for (j = 0; j <= i; j++) { cmat2[i][j] += risk * covar[i][person] * covar[j][person]; } } } } person--; if (person >= 0 && strata[person] == 1) { // added catch of person = 0 and person-- = -1 break; /*ties don't cross strata */ } } if (ndead > 0) { /* we need to add to the main terms */ if (method == CoxMethod.Breslow) { /* Breslow */ loglik[1] -= deadwt * Math.log(denom); for (i = 0; i < nvar; i++) { temp2 = a[i] / denom; /* mean */ u[i] -= deadwt * temp2; for (j = 0; j <= i; j++) { imat[j][i] += deadwt * (cmat[i][j] - temp2 * a[j]) / denom; } } } else { /* Efron */ /* ** If there are 3 deaths we have 3 terms: in the first the ** three deaths are all in, in the second they are 2/3 ** in the sums, and in the last 1/3 in the sum. Let k go ** from 0 to (ndead -1), then we will sequentially use ** denom - (k/ndead)*efronwt as the denominator ** a - (k/ndead)*a2 as the "a" term ** cmat - (k/ndead)*cmat2 as the "cmat" term ** and reprise the equations just above. */ for (k = 0; k < ndead; k++) { temp = (double) k / ndead; wtave = deadwt / ndead; d2 = denom - temp * efronwt; loglik[1] -= wtave * Math.log(d2); for (i = 0; i < nvar; i++) { temp2 = (a[i] - temp * a2[i]) / d2; u[i] -= wtave * temp2; for (j = 0; j <= i; j++) { imat[j][i] += (wtave / d2) * ((cmat[i][j] - temp * cmat2[i][j]) - temp2 * (a[j] - temp * a2[j])); } } } for (i = 0; i < nvar; i++) { a2[i] = 0; for (j = 0; j < nvar; j++) { cmat2[i][j] = 0; } } } } } /* end of accumulation loop */ loglik[0] = loglik[1]; /* save the loglik for iterations 0 */ /* am I done? ** update the betas and test for convergence */ for (i = 0; i < nvar; i++) /*use 'a' as a temp to save u0, for the score test*/ { a[i] = u[i]; } flag = Cholesky2.process(imat, nvar, toler); chsolve2(imat, nvar, a); /* a replaced by a *inverse(i) */ temp = 0; for (i = 0; i < nvar; i++) { temp += u[i] * a[i]; } sctest = temp; /* score test */ /* ** Never, never complain about convergence on the first step. That way, ** if someone HAS to they can force one iterations at a time. */ for (i = 0; i < nvar; i++) { newbeta[i] = beta[i] + a[i]; } if (maxiter == 0) { chinv2(imat, nvar); for (i = 0; i < nvar; i++) { beta[i] *= scale[i]; /*return to original scale */ u[i] /= scale[i]; imat[i][i] *= scale[i] * scale[i]; for (j = 0; j < i; j++) { imat[j][i] *= scale[i] * scale[j]; imat[i][j] = imat[j][i]; } } // goto finish; gotofinish = true; } /* ** here is the main loop */ if (gotofinish == false) { halving = 0; /* =1 when in the midst of "step halving" */ for (iter = 1; iter <= maxiter; iter++) { newlk = 0; for (i = 0; i < nvar; i++) { u[i] = 0; for (j = 0; j < nvar; j++) { imat[i][j] = 0; } } /* ** The data is sorted from smallest time to largest ** Start at the largest time, accumulating the risk set 1 by 1 */ for (person = nused - 1; person >= 0; ) { if (strata[person] == 1) { /* rezero temps for each strata */ denom = 0; nrisk = 0; for (i = 0; i < nvar; i++) { a[i] = 0; for (j = 0; j < nvar; j++) { cmat[i][j] = 0; } } } dtime = time[person]; deadwt = 0; ndead = 0; efronwt = 0; while (person >= 0 && time[person] == dtime) { nrisk++; zbeta = offset[person]; for (i = 0; i < nvar; i++) { zbeta += newbeta[i] * covar[i][person]; } zbeta = coxsafe(zbeta); risk = Math.exp(zbeta) * weights[person]; denom += risk; for (i = 0; i < nvar; i++) { a[i] += risk * covar[i][person]; for (j = 0; j <= i; j++) { cmat[i][j] += risk * covar[i][person] * covar[j][person]; } } if (status[person] == 1) { ndead++; deadwt += weights[person]; newlk += weights[person] * zbeta; for (i = 0; i < nvar; i++) { u[i] += weights[person] * covar[i][person]; } if (method == CoxMethod.Efron) { /* Efron */ efronwt += risk; for (i = 0; i < nvar; i++) { a2[i] += risk * covar[i][person]; for (j = 0; j <= i; j++) { cmat2[i][j] += risk * covar[i][person] * covar[j][person]; } } } } person--; if (person >= 0 && strata[person] == 1) { // added catch of person = 0 and person-- = -1 break; /*ties don't cross strata */ } } if (ndead > 0) { /* add up terms*/ if (method == CoxMethod.Breslow) { /* Breslow */ newlk -= deadwt * Math.log(denom); for (i = 0; i < nvar; i++) { temp2 = a[i] / denom; /* mean */ u[i] -= deadwt * temp2; for (j = 0; j <= i; j++) { imat[j][i] += (deadwt / denom) * (cmat[i][j] - temp2 * a[j]); } } } else { /* Efron */ for (k = 0; k < ndead; k++) { temp = (double) k / ndead; wtave = deadwt / ndead; d2 = denom - temp * efronwt; newlk -= wtave * Math.log(d2); for (i = 0; i < nvar; i++) { temp2 = (a[i] - temp * a2[i]) / d2; u[i] -= wtave * temp2; for (j = 0; j <= i; j++) { imat[j][i] += (wtave / d2) * ((cmat[i][j] - temp * cmat2[i][j]) - temp2 * (a[j] - temp * a2[j])); } } } for (i = 0; i < nvar; i++) { /*in anticipation */ a2[i] = 0; for (j = 0; j < nvar; j++) { cmat2[i][j] = 0; } } } } } /* end of accumulation loop */ /* am I done? ** update the betas and test for convergence */ flag = Cholesky2.process(imat, nvar, toler); if (Math.abs(1 - (loglik[1] / newlk)) <= eps && halving == 0) { /* all done */ loglik[1] = newlk; chinv2(imat, nvar); /* invert the information matrix */ for (i = 0; i < nvar; i++) { beta[i] = newbeta[i] * scale[i]; u[i] /= scale[i]; imat[i][i] *= scale[i] * scale[i]; for (j = 0; j < i; j++) { imat[j][i] *= scale[i] * scale[j]; imat[i][j] = imat[j][i]; } } // goto finish; gotofinish = true; break; } if (iter == maxiter) { break; /*skip the step halving calc*/ } if (newlk < loglik[1]) { /*it is not converging ! */ halving = 1; for (i = 0; i < nvar; i++) { newbeta[i] = (newbeta[i] + beta[i]) / 2; /*half of old increment */ } } else { halving = 0; loglik[1] = newlk; chsolve2(imat, nvar, u); j = 0; for (i = 0; i < nvar; i++) { beta[i] = newbeta[i]; newbeta[i] = newbeta[i] + u[i]; } } } /* return for another iteration */ } if (gotofinish == false) { /* ** We end up here only if we ran out of iterations */ loglik[1] = newlk; chinv2(imat, nvar); for (i = 0; i < nvar; i++) { beta[i] = newbeta[i] * scale[i]; u[i] /= scale[i]; imat[i][i] *= scale[i] * scale[i]; for (j = 0; j < i; j++) { imat[j][i] *= scale[i] * scale[j]; imat[i][j] = imat[j][i]; } } flag = 1000; } // finish: /* for (j = 0; j < numCovariates; j++) { b[j] = b[j] / SD[j]; * ix = j * (numCovariates + 1) + j SE[j] = Math.sqrt(a[ix(j, j, numCovariates + 1)]) / SD[j]; // o = o + (" " + variables.get(j) + " " + Fmt(b[j]) + Fmt(SE[j]) + Fmt(Math.exp(b[j])) + Fmt(Norm(Math.abs(b[j] / SE[j]))) + Fmt(Math.exp(b[j] - 1.95 * SE[j])) + Fmt(Math.exp(b[j] + 1.95 * SE[j])) + NL); CoxCoefficient coe = coxInfo.getCoefficient(variables.get(j)); coe.coeff = b[j]; coe.stdError = SE[j]; coe.hazardRatio = Math.exp(b[j]); coe.pvalue = Norm(Math.abs(b[j] / SE[j])); coe.hazardRatioLoCI = Math.exp(b[j] - 1.95 * SE[j]); coe.hazardRatioHiCI = Math.exp(b[j] + 1.95 * SE[j]); } */ coxInfo.setScoreLogrankTest(sctest); coxInfo.setDegreeFreedom(beta.length); coxInfo.setScoreLogrankTestpvalue(ChiSq.chiSq(coxInfo.getScoreLogrankTest(), beta.length)); coxInfo.setVariance(imat); coxInfo.u = u; // for (int n = 0; n < beta.length; n++) { // se[n] = Math.sqrt(imat[n][n]); // / sd[n]; // } // System.out.println("coef,se, means,u"); for (int n = 0; n < beta.length; n++) { CoxCoefficient coe = new CoxCoefficient(); coe.name = variables.get(n); coe.mean = means[n]; coe.standardDeviation = sd[n]; coe.coeff = beta[n]; coe.stdError = Math.sqrt(imat[n][n]); coe.hazardRatio = Math.exp(coe.getCoeff()); coe.z = coe.getCoeff() / coe.getStdError(); coe.pvalue = ChiSq.norm(Math.abs(coe.getCoeff() / coe.getStdError())); double z = 1.959964; coe.hazardRatioLoCI = Math.exp(coe.getCoeff() - z * coe.getStdError()); coe.hazardRatioHiCI = Math.exp(coe.getCoeff() + z * coe.getStdError()); coxInfo.setCoefficient(coe.getName(), coe); // System.out.println(beta[n] + "," + se[n] + "," + means[n] + "," + sd[n] + "," + u[n]); //+ // "," + imat[n] "," + loglik[n] + "," + sctest[n] + "," + iterations[n] + "," + flag[n] } coxInfo.maxIterations = maxiter; coxInfo.eps = eps; coxInfo.toler = toler; coxInfo.iterations = iter; coxInfo.flag = flag; coxInfo.loglikInit = loglik[0]; coxInfo.loglikFinal = loglik[1]; coxInfo.method = method; // System.out.println("loglik[0]=" + loglik[0]); // System.out.println("loglik[1]=" + loglik[1]); // System.out.println("chisq? sctest[0]=" + sctest[0]); // System.out.println("?overall model p-value=" + chiSq(sctest[0], beta.length)); // System.out.println(); // for (int n = 0; n < covar[0].length; n++) { // System.out.print(n); // for (int variable = 0; variable < covar.length; variable++) { // System.out.print("\t" + covar[variable][n]); // } // System.out.println(); // } // for (SurvivalInfo si : data) { // System.out.println(si.order + " " + si.getScore()); // } // coxInfo.dump(); coxphfitSCleanup(coxInfo, useWeighted, robust, clusterList); return coxInfo; }