Пример #1
0
 @Override
 protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileCtx) {
   sb.nl();
   sb.ip("public boolean isSupervised() { return true; }").nl();
   sb.ip("public int nfeatures() { return " + _output.nfeatures() + "; }").nl();
   sb.ip("public int nclasses() { return " + _output.nclasses() + "; }").nl();
   return sb;
 }
Пример #2
0
  @Override
  protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileCtx) {
    sb = super.toJavaInit(sb, fileCtx);
    sb.ip("public boolean isSupervised() { return " + isSupervised() + "; }").nl();
    sb.ip("public int nfeatures() { return " + _output.nfeatures() + "; }").nl();
    sb.ip("public int nclasses() { return " + _output.nclasses() + "; }").nl();

    JCodeGen.toStaticVar(sb, "RESCNT", _output._rescnt, "Count of categorical levels in response.");
    JCodeGen.toStaticVar(
        sb, "APRIORI", _output._apriori_raw, "Apriori class distribution of the response.");
    JCodeGen.toStaticVar(sb, "PCOND", _output._pcond_raw, "Conditional probability of predictors.");

    double[] dlen = null;
    if (_output._ncats > 0) {
      dlen = new double[_output._ncats];
      for (int i = 0; i < _output._ncats; i++) dlen[i] = _output._domains[i].length;
    }
    JCodeGen.toStaticVar(
        sb, "DOMLEN", dlen, "Number of unique levels for each categorical predictor.");
    return sb;
  }
Пример #3
0
  @Override
  protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileCtx) {
    sb = super.toJavaInit(sb, fileCtx);
    sb.ip("public boolean isSupervised() { return " + isSupervised() + "; }").nl();
    sb.ip("public int nfeatures() { return " + _output.nfeatures() + "; }").nl();
    sb.ip("public int nclasses() { return " + _parms._nv + "; }").nl();

    if (_output._nnums > 0) {
      JCodeGen.toStaticVar(
          sb,
          "NORMMUL",
          _output._normMul,
          "Standardization/Normalization scaling factor for numerical variables.");
      JCodeGen.toStaticVar(
          sb,
          "NORMSUB",
          _output._normSub,
          "Standardization/Normalization offset for numerical variables.");
    }
    JCodeGen.toStaticVar(sb, "CATOFFS", _output._catOffsets, "Categorical column offsets.");
    JCodeGen.toStaticVar(sb, "PERMUTE", _output._permutation, "Permutation index vector.");
    JCodeGen.toStaticVar(sb, "EIGVECS", _output._v, "Eigenvector matrix.");
    return sb;
  }
Пример #4
0
 @Override
 protected void toJavaPredictBody(
     SBPrintStream body,
     CodeGeneratorPipeline classCtx,
     CodeGeneratorPipeline fileCtx,
     final boolean verboseCode) {
   // Generate static fields
   classCtx.add(
       new CodeGenerator() {
         @Override
         public void generate(JCodeSB out) {
           JCodeGen.toClassWithArray(out, "static", "BETA", beta_internal()); // "The Coefficients"
           JCodeGen.toClassWithArray(
               out, "static", "NUM_MEANS", _output._dinfo._numMeans, "Imputed numeric values");
           JCodeGen.toClassWithArray(
               out,
               "static",
               "CAT_MODES",
               _output._dinfo._catModes,
               "Imputed categorical values.");
           JCodeGen.toStaticVar(out, "CATOFFS", dinfo()._catOffsets, "Categorical Offsets");
         }
       });
   body.ip("final double [] b = BETA.VALUES;").nl();
   if (_parms._missing_values_handling == MissingValuesHandling.MeanImputation) {
     body.ip(
             "for(int i = 0; i < "
                 + _output._dinfo._cats
                 + "; ++i) if(Double.isNaN(data[i])) data[i] = CAT_MODES.VALUES[i];")
         .nl();
     body.ip(
             "for(int i = 0; i < "
                 + _output._dinfo._nums
                 + "; ++i) if(Double.isNaN(data[i + "
                 + _output._dinfo._cats
                 + "])) data[i+"
                 + _output._dinfo._cats
                 + "] = NUM_MEANS.VALUES[i];")
         .nl();
   }
   if (_parms._family != Family.multinomial) {
     body.ip("double eta = 0.0;").nl();
     if (!_parms._use_all_factor_levels) { // skip level 0 of all factors
       body.ip("for(int i = 0; i < CATOFFS.length-1; ++i) if(data[i] != 0) {").nl();
       body.ip("  int ival = (int)data[i] - 1;").nl();
       body.ip(
               "  if(ival != data[i] - 1) throw new IllegalArgumentException(\"categorical value out of range\");")
           .nl();
       body.ip("  ival += CATOFFS[i];").nl();
       body.ip("  if(ival < CATOFFS[i + 1])").nl();
       body.ip("    eta += b[ival];").nl();
     } else { // do not skip any levels
       body.ip("for(int i = 0; i < CATOFFS.length-1; ++i) {").nl();
       body.ip("  int ival = (int)data[i];").nl();
       body.ip(
               "  if(ival != data[i]) throw new IllegalArgumentException(\"categorical value out of range\");")
           .nl();
       body.ip("  ival += CATOFFS[i];").nl();
       body.ip("  if(ival < CATOFFS[i + 1])").nl();
       body.ip("    eta += b[ival];").nl();
     }
     body.ip("}").nl();
     final int noff = dinfo().numStart() - dinfo()._cats;
     body.ip("for(int i = ").p(dinfo()._cats).p("; i < b.length-1-").p(noff).p("; ++i)").nl();
     body.ip("  eta += b[").p(noff).p("+i]*data[i];").nl();
     body.ip("eta += b[b.length-1]; // reduce intercept").nl();
     if (_parms._family != Family.tweedie)
       body.ip("double mu = hex.genmodel.GenModel.GLM_").p(_parms._link.toString()).p("Inv(eta");
     else
       body.ip(
           "double mu = hex.genmodel.GenModel.GLM_tweedieInv(eta," + _parms._tweedie_link_power);
     body.p(");").nl();
     if (_parms._family == Family.binomial) {
       body.ip("preds[0] = (mu > ")
           .p(defaultThreshold())
           .p(") ? 1 : 0")
           .p("; // threshold given by ROC")
           .nl();
       body.ip("preds[1] = 1.0 - mu; // class 0").nl();
       body.ip("preds[2] =       mu; // class 1").nl();
     } else {
       body.ip("preds[0] = mu;").nl();
     }
   } else {
     int P = _output._global_beta_multinomial[0].length;
     body.ip("preds[0] = 0;").nl();
     body.ip("for(int c = 0; c < " + _output._nclasses + "; ++c){").nl();
     body.ip("  preds[c+1] = 0;").nl();
     if (dinfo()._cats > 0) {
       if (!_parms._use_all_factor_levels) { // skip level 0 of all factors
         body.ip("  for(int i = 0; i < CATOFFS.length-1; ++i) if(data[i] != 0) {").nl();
         body.ip("    int ival = (int)data[i] - 1;").nl();
         body.ip(
                 "    if(ival != data[i] - 1) throw new IllegalArgumentException(\"categorical value out of range\");")
             .nl();
         body.ip("    ival += CATOFFS[i];").nl();
         body.ip("    if(ival < CATOFFS[i + 1])").nl();
         body.ip("      preds[c+1] += b[ival+c*" + P + "];").nl();
       } else { // do not skip any levels
         body.ip("  for(int i = 0; i < CATOFFS.length-1; ++i) {").nl();
         body.ip("    int ival = (int)data[i];").nl();
         body.ip(
                 "    if(ival != data[i]) throw new IllegalArgumentException(\"categorical value out of range\");")
             .nl();
         body.ip("    ival += CATOFFS[i];").nl();
         body.ip("    if(ival < CATOFFS[i + 1])").nl();
         body.ip("      preds[c+1] += b[ival+c*" + P + "];").nl();
       }
       body.ip("  }").nl();
     }
     final int noff = dinfo().numStart();
     body.ip("  for(int i = 0; i < " + dinfo()._nums + "; ++i)").nl();
     body.ip("    preds[c+1] += b[" + noff + "+i + c*" + P + "]*data[i];").nl();
     body.ip("  preds[c+1] += b[" + (P - 1) + " + c*" + P + "]; // reduce intercept").nl();
     body.ip("}").nl();
     body.ip("double max_row = 0;").nl();
     body.ip("for(int c = 1; c < preds.length; ++c) if(preds[c] > max_row) max_row = preds[c];")
         .nl();
     body.ip("double sum_exp = 0;").nl();
     body.ip(
             "for(int c = 1; c < preds.length; ++c) { sum_exp += (preds[c] = Math.exp(preds[c]-max_row));}")
         .nl();
     body.ip("sum_exp = 1/sum_exp;").nl();
     body.ip("double max_p = 0;").nl();
     body.ip(
             "for(int c = 1; c < preds.length; ++c) if((preds[c] *= sum_exp) > max_p){ max_p = preds[c]; preds[0] = c-1;};")
         .nl();
   }
 }
Пример #5
0
  @Override
  protected void toJavaPredictBody(
      SBPrintStream bodySb,
      CodeGeneratorPipeline classCtx,
      CodeGeneratorPipeline fileCtx,
      final boolean verboseCode) {
    bodySb.i().p("java.util.Arrays.fill(preds,0);").nl();
    bodySb.i().p("double mean, sdev, prob;").nl();
    bodySb.i().p("double[] nums = new double[" + _output._levels.length + "];").nl();

    bodySb.i().p("for(int i = 0; i < " + _output._levels.length + "; i++) {").nl();
    bodySb.i(1).p("nums[i] = Math.log(APRIORI[i]);").nl();
    bodySb.i(1).p("for(int j = 0; j < " + _output._ncats + "; j++) {").nl();
    bodySb.i(2).p("if(Double.isNaN(data[j])) continue;").nl();
    bodySb.i(2).p("int level = (int)data[j];").nl();
    bodySb
        .i(2)
        .p(
            "prob = level < "
                + _output._pcond_raw.length
                + " ? PCOND[j][i][level] : "
                + (_parms._laplace == 0
                    ? 0
                    : _parms._laplace + "/(RESCNT[i] + " + _parms._laplace + "*DOMLEN[j])"))
        .p(";")
        .nl();
    bodySb
        .i(2)
        .p(
            "nums[i] += Math.log(prob <= "
                + _parms._eps_prob
                + " ? "
                + _parms._min_prob
                + " : prob);")
        .nl();
    bodySb.i(1).p("}").nl();

    bodySb.i(1).p("for(int j = " + _output._ncats + "; j < data.length; j++) {").nl();
    bodySb.i(2).p("if(Double.isNaN(data[j])) continue;").nl();
    bodySb.i(2).p("mean = Double.isNaN(PCOND[j][i][0]) ? 0 : PCOND[j][i][0];").nl();
    bodySb
        .i(2)
        .p(
            "sdev = Double.isNaN(PCOND[j][i][1]) ? 1 : (PCOND[j][i][1] <= "
                + _parms._eps_sdev
                + " ? "
                + _parms._min_sdev
                + " : PCOND[j][i][1]);")
        .nl();
    bodySb
        .i(2)
        .p(
            "prob = Math.exp(-((data[j]-mean)*(data[j]-mean))/(2.*sdev*sdev)) / (sdev*Math.sqrt(2.*Math.PI));")
        .nl();
    bodySb
        .i(2)
        .p(
            "nums[i] += Math.log(prob <= "
                + _parms._eps_prob
                + " ? "
                + _parms._min_prob
                + " : prob);")
        .nl();
    bodySb.i(1).p("}").nl();
    bodySb.i().p("}").nl();

    bodySb.i().p("double sum;").nl();
    bodySb.i().p("for(int i = 0; i < nums.length; i++) {").nl();
    bodySb.i(1).p("sum = 0;").nl();
    bodySb.i(1).p("for(int j = 0; j < nums.length; j++) {").nl();
    bodySb.i(2).p("sum += Math.exp(nums[j]-nums[i]);").nl();
    bodySb.i(1).p("}").nl();
    bodySb.i(1).p("preds[i+1] = 1/sum;").nl();
    bodySb.i().p("}").nl();

    bodySb
        .i()
        .p(
            "preds[0] = hex.genmodel.GenModel.getPrediction(preds, PRIOR_CLASS_DISTRIB, data, "
                + defaultThreshold()
                + ");")
        .nl();
  }
Пример #6
0
  @Override
  protected void toJavaPredictBody(
      SBPrintStream bodySb,
      CodeGeneratorPipeline classCtx,
      CodeGeneratorPipeline fileCtx,
      final boolean verboseCode) {
    bodySb.i().p("java.util.Arrays.fill(preds,0);").nl();
    final int cats = _output._ncats;
    final int nums = _output._nnums;
    bodySb.i().p("final int nstart = CATOFFS[CATOFFS.length-1];").nl();
    bodySb.i().p("for(int i = 0; i < ").p(_parms._nv).p("; i++) {").nl();
    // Categorical columns
    bodySb.i(1).p("for(int j = 0; j < ").p(cats).p("; j++) {").nl();
    bodySb.i(2).p("double d = data[PERMUTE[j]];").nl();
    bodySb.i(2).p("int last = CATOFFS[j+1]-CATOFFS[j]-1;").nl();
    bodySb
        .i(2)
        .p("int c = Double.isNaN(d) ? last : (int)d")
        .p(_parms._use_all_factor_levels ? ";" : "-1;")
        .nl();
    bodySb.i(2).p("if(c < 0 || c > last) continue;").nl();
    bodySb.i(2).p("preds[i] += EIGVECS[CATOFFS[j]+c][i];").nl();
    bodySb.i(1).p("}").nl();

    // Numeric columns
    bodySb.i(1).p("for(int j = 0; j < ").p(nums).p("; j++) {").nl();
    bodySb
        .i(2)
        .p(
            "preds[i] += (data[PERMUTE[j"
                + (cats > 0 ? "+" + cats : "")
                + "]]-NORMSUB[j])*NORMMUL[j]*EIGVECS[j"
                + (cats > 0 ? "+ nstart" : "")
                + "][i];")
        .nl();
    bodySb.i(1).p("}").nl();
    bodySb.i().p("}").nl();
  }