@Override protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileCtx) { sb.nl(); sb.ip("public boolean isSupervised() { return true; }").nl(); sb.ip("public int nfeatures() { return " + _output.nfeatures() + "; }").nl(); sb.ip("public int nclasses() { return " + _output.nclasses() + "; }").nl(); return sb; }
@Override protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileCtx) { sb = super.toJavaInit(sb, fileCtx); sb.ip("public boolean isSupervised() { return " + isSupervised() + "; }").nl(); sb.ip("public int nfeatures() { return " + _output.nfeatures() + "; }").nl(); sb.ip("public int nclasses() { return " + _output.nclasses() + "; }").nl(); JCodeGen.toStaticVar(sb, "RESCNT", _output._rescnt, "Count of categorical levels in response."); JCodeGen.toStaticVar( sb, "APRIORI", _output._apriori_raw, "Apriori class distribution of the response."); JCodeGen.toStaticVar(sb, "PCOND", _output._pcond_raw, "Conditional probability of predictors."); double[] dlen = null; if (_output._ncats > 0) { dlen = new double[_output._ncats]; for (int i = 0; i < _output._ncats; i++) dlen[i] = _output._domains[i].length; } JCodeGen.toStaticVar( sb, "DOMLEN", dlen, "Number of unique levels for each categorical predictor."); return sb; }
@Override protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileCtx) { sb = super.toJavaInit(sb, fileCtx); sb.ip("public boolean isSupervised() { return " + isSupervised() + "; }").nl(); sb.ip("public int nfeatures() { return " + _output.nfeatures() + "; }").nl(); sb.ip("public int nclasses() { return " + _parms._nv + "; }").nl(); if (_output._nnums > 0) { JCodeGen.toStaticVar( sb, "NORMMUL", _output._normMul, "Standardization/Normalization scaling factor for numerical variables."); JCodeGen.toStaticVar( sb, "NORMSUB", _output._normSub, "Standardization/Normalization offset for numerical variables."); } JCodeGen.toStaticVar(sb, "CATOFFS", _output._catOffsets, "Categorical column offsets."); JCodeGen.toStaticVar(sb, "PERMUTE", _output._permutation, "Permutation index vector."); JCodeGen.toStaticVar(sb, "EIGVECS", _output._v, "Eigenvector matrix."); return sb; }
@Override protected void toJavaPredictBody( SBPrintStream body, CodeGeneratorPipeline classCtx, CodeGeneratorPipeline fileCtx, final boolean verboseCode) { // Generate static fields classCtx.add( new CodeGenerator() { @Override public void generate(JCodeSB out) { JCodeGen.toClassWithArray(out, "static", "BETA", beta_internal()); // "The Coefficients" JCodeGen.toClassWithArray( out, "static", "NUM_MEANS", _output._dinfo._numMeans, "Imputed numeric values"); JCodeGen.toClassWithArray( out, "static", "CAT_MODES", _output._dinfo._catModes, "Imputed categorical values."); JCodeGen.toStaticVar(out, "CATOFFS", dinfo()._catOffsets, "Categorical Offsets"); } }); body.ip("final double [] b = BETA.VALUES;").nl(); if (_parms._missing_values_handling == MissingValuesHandling.MeanImputation) { body.ip( "for(int i = 0; i < " + _output._dinfo._cats + "; ++i) if(Double.isNaN(data[i])) data[i] = CAT_MODES.VALUES[i];") .nl(); body.ip( "for(int i = 0; i < " + _output._dinfo._nums + "; ++i) if(Double.isNaN(data[i + " + _output._dinfo._cats + "])) data[i+" + _output._dinfo._cats + "] = NUM_MEANS.VALUES[i];") .nl(); } if (_parms._family != Family.multinomial) { body.ip("double eta = 0.0;").nl(); if (!_parms._use_all_factor_levels) { // skip level 0 of all factors body.ip("for(int i = 0; i < CATOFFS.length-1; ++i) if(data[i] != 0) {").nl(); body.ip(" int ival = (int)data[i] - 1;").nl(); body.ip( " if(ival != data[i] - 1) throw new IllegalArgumentException(\"categorical value out of range\");") .nl(); body.ip(" ival += CATOFFS[i];").nl(); body.ip(" if(ival < CATOFFS[i + 1])").nl(); body.ip(" eta += b[ival];").nl(); } else { // do not skip any levels body.ip("for(int i = 0; i < CATOFFS.length-1; ++i) {").nl(); body.ip(" int ival = (int)data[i];").nl(); body.ip( " if(ival != data[i]) throw new IllegalArgumentException(\"categorical value out of range\");") .nl(); body.ip(" ival += CATOFFS[i];").nl(); body.ip(" if(ival < CATOFFS[i + 1])").nl(); body.ip(" eta += b[ival];").nl(); } body.ip("}").nl(); final int noff = dinfo().numStart() - dinfo()._cats; body.ip("for(int i = ").p(dinfo()._cats).p("; i < b.length-1-").p(noff).p("; ++i)").nl(); body.ip(" eta += b[").p(noff).p("+i]*data[i];").nl(); body.ip("eta += b[b.length-1]; // reduce intercept").nl(); if (_parms._family != Family.tweedie) body.ip("double mu = hex.genmodel.GenModel.GLM_").p(_parms._link.toString()).p("Inv(eta"); else body.ip( "double mu = hex.genmodel.GenModel.GLM_tweedieInv(eta," + _parms._tweedie_link_power); body.p(");").nl(); if (_parms._family == Family.binomial) { body.ip("preds[0] = (mu > ") .p(defaultThreshold()) .p(") ? 1 : 0") .p("; // threshold given by ROC") .nl(); body.ip("preds[1] = 1.0 - mu; // class 0").nl(); body.ip("preds[2] = mu; // class 1").nl(); } else { body.ip("preds[0] = mu;").nl(); } } else { int P = _output._global_beta_multinomial[0].length; body.ip("preds[0] = 0;").nl(); body.ip("for(int c = 0; c < " + _output._nclasses + "; ++c){").nl(); body.ip(" preds[c+1] = 0;").nl(); if (dinfo()._cats > 0) { if (!_parms._use_all_factor_levels) { // skip level 0 of all factors body.ip(" for(int i = 0; i < CATOFFS.length-1; ++i) if(data[i] != 0) {").nl(); body.ip(" int ival = (int)data[i] - 1;").nl(); body.ip( " if(ival != data[i] - 1) throw new IllegalArgumentException(\"categorical value out of range\");") .nl(); body.ip(" ival += CATOFFS[i];").nl(); body.ip(" if(ival < CATOFFS[i + 1])").nl(); body.ip(" preds[c+1] += b[ival+c*" + P + "];").nl(); } else { // do not skip any levels body.ip(" for(int i = 0; i < CATOFFS.length-1; ++i) {").nl(); body.ip(" int ival = (int)data[i];").nl(); body.ip( " if(ival != data[i]) throw new IllegalArgumentException(\"categorical value out of range\");") .nl(); body.ip(" ival += CATOFFS[i];").nl(); body.ip(" if(ival < CATOFFS[i + 1])").nl(); body.ip(" preds[c+1] += b[ival+c*" + P + "];").nl(); } body.ip(" }").nl(); } final int noff = dinfo().numStart(); body.ip(" for(int i = 0; i < " + dinfo()._nums + "; ++i)").nl(); body.ip(" preds[c+1] += b[" + noff + "+i + c*" + P + "]*data[i];").nl(); body.ip(" preds[c+1] += b[" + (P - 1) + " + c*" + P + "]; // reduce intercept").nl(); body.ip("}").nl(); body.ip("double max_row = 0;").nl(); body.ip("for(int c = 1; c < preds.length; ++c) if(preds[c] > max_row) max_row = preds[c];") .nl(); body.ip("double sum_exp = 0;").nl(); body.ip( "for(int c = 1; c < preds.length; ++c) { sum_exp += (preds[c] = Math.exp(preds[c]-max_row));}") .nl(); body.ip("sum_exp = 1/sum_exp;").nl(); body.ip("double max_p = 0;").nl(); body.ip( "for(int c = 1; c < preds.length; ++c) if((preds[c] *= sum_exp) > max_p){ max_p = preds[c]; preds[0] = c-1;};") .nl(); } }
@Override protected void toJavaPredictBody( SBPrintStream bodySb, CodeGeneratorPipeline classCtx, CodeGeneratorPipeline fileCtx, final boolean verboseCode) { bodySb.i().p("java.util.Arrays.fill(preds,0);").nl(); bodySb.i().p("double mean, sdev, prob;").nl(); bodySb.i().p("double[] nums = new double[" + _output._levels.length + "];").nl(); bodySb.i().p("for(int i = 0; i < " + _output._levels.length + "; i++) {").nl(); bodySb.i(1).p("nums[i] = Math.log(APRIORI[i]);").nl(); bodySb.i(1).p("for(int j = 0; j < " + _output._ncats + "; j++) {").nl(); bodySb.i(2).p("if(Double.isNaN(data[j])) continue;").nl(); bodySb.i(2).p("int level = (int)data[j];").nl(); bodySb .i(2) .p( "prob = level < " + _output._pcond_raw.length + " ? PCOND[j][i][level] : " + (_parms._laplace == 0 ? 0 : _parms._laplace + "/(RESCNT[i] + " + _parms._laplace + "*DOMLEN[j])")) .p(";") .nl(); bodySb .i(2) .p( "nums[i] += Math.log(prob <= " + _parms._eps_prob + " ? " + _parms._min_prob + " : prob);") .nl(); bodySb.i(1).p("}").nl(); bodySb.i(1).p("for(int j = " + _output._ncats + "; j < data.length; j++) {").nl(); bodySb.i(2).p("if(Double.isNaN(data[j])) continue;").nl(); bodySb.i(2).p("mean = Double.isNaN(PCOND[j][i][0]) ? 0 : PCOND[j][i][0];").nl(); bodySb .i(2) .p( "sdev = Double.isNaN(PCOND[j][i][1]) ? 1 : (PCOND[j][i][1] <= " + _parms._eps_sdev + " ? " + _parms._min_sdev + " : PCOND[j][i][1]);") .nl(); bodySb .i(2) .p( "prob = Math.exp(-((data[j]-mean)*(data[j]-mean))/(2.*sdev*sdev)) / (sdev*Math.sqrt(2.*Math.PI));") .nl(); bodySb .i(2) .p( "nums[i] += Math.log(prob <= " + _parms._eps_prob + " ? " + _parms._min_prob + " : prob);") .nl(); bodySb.i(1).p("}").nl(); bodySb.i().p("}").nl(); bodySb.i().p("double sum;").nl(); bodySb.i().p("for(int i = 0; i < nums.length; i++) {").nl(); bodySb.i(1).p("sum = 0;").nl(); bodySb.i(1).p("for(int j = 0; j < nums.length; j++) {").nl(); bodySb.i(2).p("sum += Math.exp(nums[j]-nums[i]);").nl(); bodySb.i(1).p("}").nl(); bodySb.i(1).p("preds[i+1] = 1/sum;").nl(); bodySb.i().p("}").nl(); bodySb .i() .p( "preds[0] = hex.genmodel.GenModel.getPrediction(preds, PRIOR_CLASS_DISTRIB, data, " + defaultThreshold() + ");") .nl(); }
@Override protected void toJavaPredictBody( SBPrintStream bodySb, CodeGeneratorPipeline classCtx, CodeGeneratorPipeline fileCtx, final boolean verboseCode) { bodySb.i().p("java.util.Arrays.fill(preds,0);").nl(); final int cats = _output._ncats; final int nums = _output._nnums; bodySb.i().p("final int nstart = CATOFFS[CATOFFS.length-1];").nl(); bodySb.i().p("for(int i = 0; i < ").p(_parms._nv).p("; i++) {").nl(); // Categorical columns bodySb.i(1).p("for(int j = 0; j < ").p(cats).p("; j++) {").nl(); bodySb.i(2).p("double d = data[PERMUTE[j]];").nl(); bodySb.i(2).p("int last = CATOFFS[j+1]-CATOFFS[j]-1;").nl(); bodySb .i(2) .p("int c = Double.isNaN(d) ? last : (int)d") .p(_parms._use_all_factor_levels ? ";" : "-1;") .nl(); bodySb.i(2).p("if(c < 0 || c > last) continue;").nl(); bodySb.i(2).p("preds[i] += EIGVECS[CATOFFS[j]+c][i];").nl(); bodySb.i(1).p("}").nl(); // Numeric columns bodySb.i(1).p("for(int j = 0; j < ").p(nums).p("; j++) {").nl(); bodySb .i(2) .p( "preds[i] += (data[PERMUTE[j" + (cats > 0 ? "+" + cats : "") + "]]-NORMSUB[j])*NORMMUL[j]*EIGVECS[j" + (cats > 0 ? "+ nstart" : "") + "][i];") .nl(); bodySb.i(1).p("}").nl(); bodySb.i().p("}").nl(); }