Exemple #1
0
  @Override
  protected void checkMemoryFootPrint() {
    if (_model._output._ntrees == 0) return;
    int trees_so_far = _model._output._ntrees; // existing trees
    long model_mem_size =
        new ComputeModelSize(trees_so_far, _model._output._treeKeys).doAllNodes()._model_mem_size;
    _model._output._treeStats._byte_size = model_mem_size;
    double avg_tree_mem_size = (double) model_mem_size / trees_so_far;
    Log.debug(
        "Average tree size (for all classes): " + PrettyPrint.bytes((long) avg_tree_mem_size));

    // all the compressed trees are stored on the driver node
    long max_mem = H2O.SELF.get_max_mem();
    if (_parms._ntrees * avg_tree_mem_size > max_mem) {
      String msg =
          "The tree model will not fit in the driver node's memory ("
              + PrettyPrint.bytes((long) avg_tree_mem_size)
              + " per tree x "
              + _parms._ntrees
              + " > "
              + PrettyPrint.bytes(max_mem)
              + ") - try decreasing ntrees and/or max_depth or increasing min_rows!";
      error("_ntrees", msg);
      cancel(msg);
    }
  }
Exemple #2
0
 // Compute a compressed integer buffer
 private byte[] bufX(long bias, int scale, int off, int log) {
   byte[] bs = new byte[(_len << log) + off];
   int j = 0;
   for (int i = 0; i < _len; i++) {
     long le = -bias;
     if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) {
       if (isNA2(j)) {
         le = NAS[log];
       } else {
         int x = (_xs[j] == Integer.MIN_VALUE + 1 ? 0 : _xs[j]) - scale;
         le += x >= 0 ? _ls[j] * PrettyPrint.pow10i(x) : _ls[j] / PrettyPrint.pow10i(-x);
       }
       ++j;
     }
     switch (log) {
       case 0:
         bs[i + off] = (byte) le;
         break;
       case 1:
         UnsafeUtils.set2(bs, (i << 1) + off, (short) le);
         break;
       case 2:
         UnsafeUtils.set4(bs, (i << 2) + off, (int) le);
         break;
       case 3:
         UnsafeUtils.set8(bs, (i << 3) + off, le);
         break;
       default:
         throw H2O.fail();
     }
   }
   assert j == sparseLen()
       : "j = " + j + ", len = " + sparseLen() + ", len2 = " + _len + ", id[j] = " + _id[j];
   return bs;
 }
Exemple #3
0
 protected void checkMemoryFootPrint() {
   long mem_usage = 8 /*doubles*/ * _parms._k * _train.numCols() * (_parms._standardize ? 2 : 1);
   long max_mem = H2O.SELF._heartbeat.get_free_mem();
   if (mem_usage > max_mem) {
     String msg =
         "Centroids won't fit in the driver node's memory ("
             + PrettyPrint.bytes(mem_usage)
             + " > "
             + PrettyPrint.bytes(max_mem)
             + ") - try reducing the number of columns and/or the number of categorical factors.";
     error("_train", msg);
     cancel(msg);
   }
 }
Exemple #4
0
 // Compute a compressed double buffer
 private Chunk chunkD() {
   HashMap<Long, Byte> hs = new HashMap<>(CUDChunk.MAX_UNIQUES);
   Byte dummy = 0;
   final byte[] bs = MemoryManager.malloc1(_len * 8, true);
   int j = 0;
   boolean fitsInUnique = true;
   for (int i = 0; i < _len; ++i) {
     double d = 0;
     if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) {
       d =
           _ds != null
               ? _ds[j]
               : (isNA2(j) || isCategorical(j)) ? Double.NaN : _ls[j] * PrettyPrint.pow10(_xs[j]);
       ++j;
     }
     if (fitsInUnique) {
       if (hs.size() < CUDChunk.MAX_UNIQUES) // still got space
       hs.put(
             Double.doubleToLongBits(d),
             dummy); // store doubles as longs to avoid NaN comparison issues during extraction
       else
         fitsInUnique =
             (hs.size() == CUDChunk.MAX_UNIQUES)
                 && // full, but might not need more space because of repeats
                 hs.containsKey(Double.doubleToLongBits(d));
     }
     UnsafeUtils.set8d(bs, 8 * i, d);
   }
   assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen();
   if (fitsInUnique && CUDChunk.computeByteSize(hs.size(), len()) < 0.8 * bs.length)
     return new CUDChunk(bs, hs, len());
   else return new C8DChunk(bs);
 }
Exemple #5
0
 // Compute a sparse float buffer
 private byte[] bufD(final int valsz) {
   int log = 0;
   while ((1 << log) < valsz) ++log;
   assert (1 << log) == valsz;
   final int ridsz = _len >= 65535 ? 4 : 2;
   final int elmsz = ridsz + valsz;
   int off = CXDChunk._OFF;
   byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true);
   for (int i = 0; i < sparseLen(); i++, off += elmsz) {
     if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]);
     else UnsafeUtils.set4(buf, off, _id[i]);
     final double dval =
         _ds == null ? isNA2(i) ? Double.NaN : _ls[i] * PrettyPrint.pow10(_xs[i]) : _ds[i];
     switch (valsz) {
       case 4:
         UnsafeUtils.set4f(buf, off + ridsz, (float) dval);
         break;
       case 8:
         UnsafeUtils.set8d(buf, off + ridsz, dval);
         break;
       default:
         throw H2O.fail();
     }
   }
   assert off == buf.length;
   return buf;
 }
Exemple #6
0
 protected void switch_to_doubles() {
   assert _ds == null;
   double[] ds = MemoryManager.malloc8d(sparseLen());
   for (int i = 0; i < sparseLen(); ++i)
     if (isNA2(i) || isCategorical2(i)) ds[i] = Double.NaN;
     else ds[i] = _ls[i] * PrettyPrint.pow10(_xs[i]);
   _ls = null;
   _xs = null;
   _ds = ds;
 }
Exemple #7
0
 @Override
 public long at8_impl(int i) {
   if (_len != sparseLen()) {
     int idx = Arrays.binarySearch(_id, 0, sparseLen(), i);
     if (idx >= 0) i = idx;
     else return 0;
   }
   if (isNA2(i)) throw new RuntimeException("Attempting to access NA as integer value.");
   if (_ls == null) return (long) _ds[i];
   return _ls[i] * PrettyPrint.pow10i(_xs[i]);
 }
Exemple #8
0
 @Override
 public NewChunk inflate_impl(NewChunk nc) {
   double dx = Math.log10(_scale);
   assert water.util.PrettyPrint.fitsIntoInt(dx);
   nc.set_sparseLen(0);
   nc.set_len(0);
   final int len = _len;
   for (int i = 0; i < len; i++) {
     int res = 0xFF & _mem[i + _OFF];
     if (res == C1Chunk._NA) nc.addNA();
     else nc.addNum((res + _bias), (int) dx);
   }
   return nc;
 }
Exemple #9
0
 public void addNum(long val, int exp) {
   if (isUUID() || isString()) addNA();
   else if (_ds != null) {
     assert _ls == null;
     addNum(val * PrettyPrint.pow10(exp));
   } else {
     if (val == 0) exp = 0; // Canonicalize zero
     long t; // Remove extra scaling
     while (exp < 0 && exp > -9999999 && (t = val / 10) * 10 == val) {
       val = t;
       exp++;
     }
     append2(val, exp);
   }
 }
Exemple #10
0
 @Override
 protected void checkMemoryFootPrint() {
   HeartBeat hb = H2O.SELF._heartbeat;
   double p = _train.degreesOfFreedom();
   long mem_usage =
       (long)
           (hb._cpus_allowed
               * p
               * p
               * 8 /*doubles*/
               * Math.log((double) _train.lastVec().nChunks())
               / Math.log(2.)); // one gram per core
   long max_mem = hb.get_max_mem();
   if (mem_usage > max_mem) {
     String msg =
         "Gram matrices (one per thread) won't fit in the driver node's memory ("
             + PrettyPrint.bytes(mem_usage)
             + " > "
             + PrettyPrint.bytes(max_mem)
             + ") - try reducing the number of columns and/or the number of categorical factors.";
     error("_train", msg);
     cancel(msg);
   }
 }
Exemple #11
0
    private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) {
      List<String> colHeaders = new ArrayList<>();
      List<String> colTypes = new ArrayList<>();
      List<String> colFormat = new ArrayList<>();
      colHeaders.add("Timestamp");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Duration");
      colTypes.add("string");
      colFormat.add("%s");
      colHeaders.add("Iteration");
      colTypes.add("long");
      colFormat.add("%d");
      colHeaders.add("Avg. Change of Std. Centroids");
      colTypes.add("double");
      colFormat.add("%.5f");
      colHeaders.add("Within Cluster Sum Of Squares");
      colTypes.add("double");
      colFormat.add("%.5f");

      final int rows = output._avg_centroids_chg.length;
      TwoDimTable table =
          new TwoDimTable(
              "Scoring History",
              null,
              new String[rows],
              colHeaders.toArray(new String[0]),
              colTypes.toArray(new String[0]),
              colFormat.toArray(new String[0]),
              "");
      int row = 0;
      for (int i = 0; i < rows; i++) {
        int col = 0;
        assert (row < table.getRowDim());
        assert (col < table.getColDim());
        DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
        table.set(row, col++, fmt.print(output._training_time_ms[i]));
        table.set(row, col++, PrettyPrint.msecs(output._training_time_ms[i] - _start_time, true));
        table.set(row, col++, i);
        table.set(row, col++, output._avg_centroids_chg[i]);
        table.set(row, col++, output._history_withinss[i]);
        row++;
      }
      return table;
    }
Exemple #12
0
 // Compute a sparse integer buffer
 private byte[] bufS(final int valsz) {
   int log = 0;
   while ((1 << log) < valsz) ++log;
   assert valsz == 0 || (1 << log) == valsz;
   final int ridsz = _len >= 65535 ? 4 : 2;
   final int elmsz = ridsz + valsz;
   int off = CXIChunk._OFF;
   byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true);
   for (int i = 0; i < sparseLen(); i++, off += elmsz) {
     if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]);
     else UnsafeUtils.set4(buf, off, _id[i]);
     if (valsz == 0) {
       assert _xs[i] == 0 && _ls[i] == 1;
       continue;
     }
     assert _xs[i] == Integer.MIN_VALUE || _xs[i] >= 0
         : "unexpected exponent " + _xs[i]; // assert we have int or NA
     final long lval =
         _xs[i] == Integer.MIN_VALUE ? NAS[log] : _ls[i] * PrettyPrint.pow10i(_xs[i]);
     switch (valsz) {
       case 1:
         buf[off + ridsz] = (byte) lval;
         break;
       case 2:
         short sval = (short) lval;
         UnsafeUtils.set2(buf, off + ridsz, sval);
         break;
       case 4:
         int ival = (int) lval;
         UnsafeUtils.set4(buf, off + ridsz, ival);
         break;
       case 8:
         UnsafeUtils.set8(buf, off + ridsz, lval);
         break;
       default:
         throw H2O.fail();
     }
   }
   assert off == buf.length;
   return buf;
 }
  public Job<Frame> execImpl() {
    if (integer_fraction + binary_fraction + categorical_fraction > 1)
      throw new IllegalArgumentException(
          "Integer, binary and categorical fractions must add up to <= 1.");
    if (Math.abs(missing_fraction) > 1)
      throw new IllegalArgumentException("Missing fraction must be between 0 and 1.");
    if (Math.abs(integer_fraction) > 1)
      throw new IllegalArgumentException("Integer fraction must be between 0 and 1.");
    if (Math.abs(binary_fraction) > 1)
      throw new IllegalArgumentException("Binary fraction must be between 0 and 1.");
    if (Math.abs(binary_ones_fraction) > 1)
      throw new IllegalArgumentException("Binary ones fraction must be between 0 and 1.");
    if (Math.abs(categorical_fraction) > 1)
      throw new IllegalArgumentException("Categorical fraction must be between 0 and 1.");
    if (categorical_fraction > 0 && factors <= 1)
      throw new IllegalArgumentException("Factors must be larger than 2 for categorical data.");
    if (response_factors < 1)
      throw new IllegalArgumentException(
          "Response factors must be either 1 (real-valued response), or >=2 (factor levels).");
    if (response_factors > 1024)
      throw new IllegalArgumentException("Response factors must be <= 1024.");
    if (factors > 1000000)
      throw new IllegalArgumentException("Number of factors must be <= 1,000,000).");
    if (cols <= 0 || rows <= 0)
      throw new IllegalArgumentException("Must have number of rows > 0 and columns > 1.");

    // estimate byte size of the frame
    double byte_estimate =
        randomize
            ? rows
                    * cols
                    * (binary_fraction * 1. / 8 // bits
                        + categorical_fraction * (factors < 128 ? 1 : factors < 32768 ? 2 : 4)
                        + integer_fraction
                            * (integer_range < 128
                                ? 1
                                : integer_range < 32768 ? 2 : integer_range < (1 << 31) ? 4 : 8)
                        + (1 - integer_fraction - binary_fraction - categorical_fraction)
                            * 8) // reals
                + rows * 1 // response is
            : 0; // all constants - should be small

    if (byte_estimate > H2O.CLOUD._memary[0].get_max_mem() * H2O.CLOUD.size())
      throw new IllegalArgumentException(
          "Frame is expected to require "
              + PrettyPrint.bytes((long) byte_estimate)
              + ", won't fit into H2O's memory.");

    if (!randomize) {
      if (integer_fraction != 0 || categorical_fraction != 0)
        throw new IllegalArgumentException(
            "Cannot have integer or categorical fractions > 0 unless randomize=true.");
    } else {
      if (value != 0)
        throw new IllegalArgumentException(
            "Cannot set data to a constant value if randomize=true.");
    }
    if (_dest == null) throw new IllegalArgumentException("Destination key cannot be null.");

    FrameCreator fc = new FrameCreator(this, this._key);
    start(fc, fc.nChunks() * 5);
    return this;
  }
Exemple #14
0
  @Ignore
  @Test
  public void matrixVecTest() {
    int rows = 2048;
    int cols = 8192;
    int loops = 5;
    int warmup_loops = 5;
    long seed = 0x533D;
    float nnz_ratio_vec = 0.01f; // fraction of non-zeroes for vector
    float nnz_ratio_mat = 0.1f; // fraction of non-zeroes for matrix

    float[] a = new float[rows * cols];
    float[] x = new float[cols];
    float[] y = new float[rows];
    float[] res = new float[rows];
    byte[] bits = new byte[rows];

    for (int row = 0; row < rows; ++row) {
      y[row] = 0;
      res[row] = 0;
      bits[row] = (byte) ("abcdefghijklmnopqrstuvwxyz".toCharArray()[row % 26]);
    }
    Random rng = new Random(seed);
    for (int col = 0; col < cols; ++col)
      if (rng.nextFloat() < nnz_ratio_vec) x[col] = ((float) col) / cols;

    for (int row = 0; row < rows; ++row) {
      int off = row * cols;
      for (int col = 0; col < cols; ++col) {
        if (rng.nextFloat() < nnz_ratio_mat) a[off + col] = ((float) (row + col)) / cols;
      }
    }
    Storage.DenseRowMatrix dra = new Storage.DenseRowMatrix(a, rows, cols);
    Storage.DenseColMatrix dca = new Storage.DenseColMatrix(dra, rows, cols);
    Storage.SparseRowMatrix sra = new Storage.SparseRowMatrix(dra, rows, cols);
    Storage.SparseColMatrix sca = new Storage.SparseColMatrix(dca, rows, cols);
    Storage.DenseVector dx = new Storage.DenseVector(x);
    Storage.DenseVector dy = new Storage.DenseVector(y);
    Storage.DenseVector dres = new Storage.DenseVector(res);
    Storage.SparseVector sx = new Storage.SparseVector(x);

    /** warmup */
    System.out.println("warming up.");
    float sum = 0;
    for (int l = 0; l < warmup_loops; ++l) {
      gemv_naive(res, a, x, y, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv_naive(dres, dra, dx, dy, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv_row_optimized(res, a, x, y, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv(dres, dca, dx, dy, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv(dres, dra, sx, dy, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv(dres, dca, sx, dy, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv(dres, sra, sx, dy, bits);
      sum += res[rows / 2];
    }
    for (int l = 0; l < warmup_loops; ++l) {
      gemv(dres, sca, sx, dy, bits);
      sum += res[rows / 2];
    }

    /** naive version */
    System.out.println("\nstarting naive.");
    sum = 0;
    long start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv_naive(res, a, x, y, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "naive time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting dense row * dense.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv_naive(dres, dra, dx, dy, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "dense row * dense time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting optimized dense row * dense.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv_row_optimized(res, a, x, y, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "optimized dense row * dense time: "
            + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting dense col * dense.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv(dres, dca, dx, dy, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "dense col * dense time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting dense row * sparse.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv(dres, dra, sx, dy, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "dense row * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting dense col * sparse.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv(dres, dca, sx, dy, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "dense col * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting sparse row * sparse.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv(dres, sra, sx, dy, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "sparse row * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));

    System.out.println("\nstarting sparse col * sparse.");
    sum = 0;
    start = System.currentTimeMillis();
    for (int l = 0; l < loops; ++l) {
      gemv(dres, sca, sx, dy, bits);
      sum += res[rows / 2]; // do something useful
    }
    System.out.println("result: " + sum + " and " + ArrayUtils.sum(res));
    System.out.println(
        "sparse col * sparse time: " + PrettyPrint.msecs(System.currentTimeMillis() - start, true));
  }
Exemple #15
0
    /**
     * Compute the actual train_samples_per_iteration size from the user-given parameter
     *
     * @param mp Model parameter (DeepLearning object)
     * @param numRows number of training rows
     * @param model DL model
     * @return The total number of training rows to be processed per iteration (summed over on all
     *     nodes)
     */
    private long computeTrainSamplesPerIteration(
        final DeepLearningParameters mp, final long numRows, final DeepLearningModel model) {
      long tspi = mp._train_samples_per_iteration;
      assert (tspi == 0 || tspi == -1 || tspi == -2 || tspi >= 1);
      if (tspi == 0 || (!mp._replicate_training_data && tspi == -1)) {
        tspi = numRows;
        if (!mp._quiet_mode)
          Log.info(
              "Setting train_samples_per_iteration ("
                  + mp._train_samples_per_iteration
                  + ") to one epoch: #rows ("
                  + tspi
                  + ").");
      } else if (tspi == -1) {
        tspi = (mp._single_node_mode ? 1 : H2O.CLOUD.size()) * numRows;
        if (!mp._quiet_mode)
          Log.info(
              "Setting train_samples_per_iteration ("
                  + mp._train_samples_per_iteration
                  + ") to #nodes x #rows ("
                  + tspi
                  + ").");
      } else if (tspi == -2) {
        // automatic tuning based on CPU speed, network speed and model size

        // measure cpu speed
        double total_gflops = 0;
        for (H2ONode h2o : H2O.CLOUD._memary) {
          HeartBeat hb = h2o._heartbeat;
          total_gflops += hb._gflops; // can be NaN if not yet run
        }
        if (mp._single_node_mode) total_gflops /= H2O.CLOUD.size();
        if (Double.isNaN(total_gflops)) {
          total_gflops =
              Linpack.run(H2O.SELF._heartbeat._cpus_allowed)
                  * (mp._single_node_mode ? 1 : H2O.CLOUD.size());
        }
        assert (!Double.isNaN(total_gflops));

        final long model_size = model.model_info().size();
        int[] msg_sizes =
            new int[] {
              1,
              (int) (model_size * 4) == (model_size * 4)
                  ? (int) (model_size * 4)
                  : Integer.MAX_VALUE
            };
        double[] microseconds_collective = new double[msg_sizes.length];
        NetworkTest.NetworkTester nt =
            new NetworkTest.NetworkTester(
                msg_sizes,
                null,
                microseconds_collective,
                model_size > 1e6 ? 1 : 5 /*repeats*/,
                false,
                true /*only collectives*/);
        nt.compute2();

        // length of the network traffic queue based on log-tree rollup (2 log(nodes))
        int network_queue_length =
            mp._single_node_mode || H2O.CLOUD.size() == 1
                ? 1
                : 2 * (int) Math.floor(Math.log(H2O.CLOUD.size()) / Math.log(2));

        // heuristics
        double flops_overhead_per_row = 50;
        if (mp._activation == DeepLearningParameters.Activation.Maxout
            || mp._activation == DeepLearningParameters.Activation.MaxoutWithDropout) {
          flops_overhead_per_row *= 8;
        } else if (mp._activation == DeepLearningParameters.Activation.Tanh
            || mp._activation == DeepLearningParameters.Activation.TanhWithDropout) {
          flops_overhead_per_row *= 5;
        }

        // target fraction of comm vs cpu time: 5%
        double fraction =
            mp._single_node_mode || H2O.CLOUD.size() == 1
                ? 1e-3
                : mp._target_ratio_comm_to_comp; // one single node mode, there's no model averaging
                                                 // effect, so less need to shorten the M/R
                                                 // iteration

        // estimate the time for communication (network) and training (compute)
        model.time_for_communication_us =
            (H2O.CLOUD.size() == 1
                    ? 1e4 /* add 10ms for single-node */
                    : 1e5 /* add 100ms for multi-node MR overhead */)
                + network_queue_length * microseconds_collective[1];
        double time_per_row_us =
            (flops_overhead_per_row * model_size + 10000 * model.model_info().units[0])
                / (total_gflops * 1e9)
                / H2O.SELF._heartbeat._cpus_allowed
                * 1e6;
        assert (!Double.isNaN(time_per_row_us));

        // compute the optimal number of training rows per iteration
        // fraction := time_comm_us / (time_comm_us + tspi * time_per_row_us)  ==>  tspi =
        // (time_comm_us/fraction - time_comm_us)/time_per_row_us
        tspi =
            (long)
                ((model.time_for_communication_us / fraction - model.time_for_communication_us)
                    / time_per_row_us);

        tspi =
            Math.min(
                tspi,
                (mp._single_node_mode ? 1 : H2O.CLOUD.size())
                    * numRows
                    * 10); // not more than 10x of what train_samples_per_iteration=-1 would do

        // If the number is close to a multiple of epochs, use that -> prettier scoring
        if (tspi > numRows && Math.abs(tspi % numRows) / (double) numRows < 0.2)
          tspi -= tspi % numRows;
        tspi =
            Math.min(
                tspi,
                (long)
                    (mp._epochs
                        * numRows
                        / 10)); // limit to number of epochs desired, but at least 10 iterations
                                // total
        if (H2O.CLOUD.size() == 1 || mp._single_node_mode) {
          tspi =
              Math.min(
                  tspi,
                  10
                      * (int)
                          (1e6
                              / time_per_row_us)); // in single-node mode, only run for at most 10
                                                   // seconds
        }
        tspi = Math.max(1, tspi); // at least 1 row
        tspi =
            Math.min(
                100000 * H2O.CLOUD.size(),
                tspi); // at most 100k rows per node for initial guess - can always relax later on

        if (!mp._quiet_mode) {
          Log.info("Auto-tuning parameter 'train_samples_per_iteration':");
          Log.info("Estimated compute power : " + Math.round(total_gflops * 100) / 100 + " GFlops");
          Log.info(
              "Estimated time for comm : "
                  + PrettyPrint.usecs((long) model.time_for_communication_us));
          Log.info(
              "Estimated time per row  : "
                  + ((long) time_per_row_us > 0
                      ? PrettyPrint.usecs((long) time_per_row_us)
                      : time_per_row_us + " usecs"));
          Log.info("Estimated training speed: " + (int) (1e6 / time_per_row_us) + " rows/sec");
          Log.info(
              "Setting train_samples_per_iteration ("
                  + mp._train_samples_per_iteration
                  + ") to auto-tuned value: "
                  + tspi);
        }

      } else {
        // limit user-given value to number of epochs desired
        tspi = Math.max(1, Math.min(tspi, (long) (mp._epochs * numRows)));
      }
      assert (tspi != 0 && tspi != -1 && tspi != -2 && tspi >= 1);
      model.tspiGuess = tspi;
      return tspi;
    }
Exemple #16
0
    /**
     * Train a Deep Learning neural net model
     *
     * @param model Input model (e.g., from initModel(), or from a previous training run)
     * @return Trained model
     */
    public final DeepLearningModel trainModel(DeepLearningModel model) {
      Frame validScoreFrame = null;
      Frame train, trainScoreFrame;
      try {
        //      if (checkpoint == null && !quiet_mode) logStart(); //if checkpoint is given, some
        // Job's params might be uninitialized (but the restarted model's parameters are correct)
        if (model == null) {
          model = DKV.get(dest()).get();
        }
        Log.info(
            "Model category: "
                + (_parms._autoencoder
                    ? "Auto-Encoder"
                    : isClassifier() ? "Classification" : "Regression"));
        final long model_size = model.model_info().size();
        Log.info(
            "Number of model parameters (weights/biases): " + String.format("%,d", model_size));
        model.write_lock(_job);
        _job.update(0, "Setting up training data...");
        final DeepLearningParameters mp = model.model_info().get_params();

        // temporary frames of the same "name" as the orig _train/_valid (asking the parameter's
        // Key, not the actual frame)
        // Note: don't put into DKV or they would overwrite the _train/_valid frames!
        Frame tra_fr = new Frame(mp._train, _train.names(), _train.vecs());
        Frame val_fr = _valid != null ? new Frame(mp._valid, _valid.names(), _valid.vecs()) : null;

        train = tra_fr;
        if (model._output.isClassifier() && mp._balance_classes) {
          _job.update(0, "Balancing class distribution of training data...");
          float[] trainSamplingFactors =
              new float
                  [train
                      .lastVec()
                      .domain()
                      .length]; // leave initialized to 0 -> will be filled up below
          if (mp._class_sampling_factors != null) {
            if (mp._class_sampling_factors.length != train.lastVec().domain().length)
              throw new IllegalArgumentException(
                  "class_sampling_factors must have "
                      + train.lastVec().domain().length
                      + " elements");
            trainSamplingFactors =
                mp._class_sampling_factors.clone(); // clone: don't modify the original
          }
          train =
              sampleFrameStratified(
                  train,
                  train.lastVec(),
                  train.vec(model._output.weightsName()),
                  trainSamplingFactors,
                  (long) (mp._max_after_balance_size * train.numRows()),
                  mp._seed,
                  true,
                  false);
          Vec l = train.lastVec();
          Vec w = train.vec(model._output.weightsName());
          MRUtils.ClassDist cd = new MRUtils.ClassDist(l);
          model._output._modelClassDist =
              _weights != null ? cd.doAll(l, w).rel_dist() : cd.doAll(l).rel_dist();
        }
        model.training_rows = train.numRows();
        if (_weights != null && _weights.min() == 0 && _weights.max() == 1 && _weights.isInt()) {
          model.training_rows = Math.round(train.numRows() * _weights.mean());
          Log.warn(
              "Not counting "
                  + (train.numRows() - model.training_rows)
                  + " rows with weight=0 towards an epoch.");
        }
        Log.info("One epoch corresponds to " + model.training_rows + " training data rows.");
        trainScoreFrame =
            sampleFrame(
                train,
                mp._score_training_samples,
                mp._seed); // training scoring dataset is always sampled uniformly from the training
                           // dataset
        if (trainScoreFrame != train) Scope.track(trainScoreFrame);

        if (!_parms._quiet_mode)
          Log.info("Number of chunks of the training data: " + train.anyVec().nChunks());
        if (val_fr != null) {
          model.validation_rows = val_fr.numRows();
          // validation scoring dataset can be sampled in multiple ways from the given validation
          // dataset
          if (model._output.isClassifier()
              && mp._balance_classes
              && mp._score_validation_sampling
                  == DeepLearningParameters.ClassSamplingMethod.Stratified) {
            _job.update(0, "Sampling validation data (stratified)...");
            validScoreFrame =
                sampleFrameStratified(
                    val_fr,
                    val_fr.lastVec(),
                    val_fr.vec(model._output.weightsName()),
                    null,
                    mp._score_validation_samples > 0
                        ? mp._score_validation_samples
                        : val_fr.numRows(),
                    mp._seed + 1,
                    false /* no oversampling */,
                    false);
          } else {
            _job.update(0, "Sampling validation data...");
            validScoreFrame = sampleFrame(val_fr, mp._score_validation_samples, mp._seed + 1);
            if (validScoreFrame != val_fr) Scope.track(validScoreFrame);
          }
          if (!_parms._quiet_mode)
            Log.info(
                "Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks());
        }

        // Set train_samples_per_iteration size (cannot be done earlier since this depends on
        // whether stratified sampling is done)
        model.actual_train_samples_per_iteration =
            computeTrainSamplesPerIteration(mp, model.training_rows, model);
        // Determine whether shuffling is enforced
        if (mp._replicate_training_data
            && (model.actual_train_samples_per_iteration
                == model.training_rows * (mp._single_node_mode ? 1 : H2O.CLOUD.size()))
            && !mp._shuffle_training_data
            && H2O.CLOUD.size() > 1
            && !mp._reproducible) {
          if (!mp._quiet_mode)
            Log.info(
                "Enabling training data shuffling, because all nodes train on the full dataset (replicated training data).");
          mp._shuffle_training_data = true;
        }
        if (!mp._shuffle_training_data
            && model.actual_train_samples_per_iteration == model.training_rows
            && train.anyVec().nChunks() == 1) {
          if (!mp._quiet_mode)
            Log.info(
                "Enabling training data shuffling to avoid training rows in the same order over and over (no Hogwild since there's only 1 chunk).");
          mp._shuffle_training_data = true;
        }

        //        if (!mp._quiet_mode) Log.info("Initial model:\n" + model.model_info());
        long now = System.currentTimeMillis();
        model._timeLastIterationEnter = now;
        if (_parms._autoencoder) {
          _job.update(0, "Scoring null model of autoencoder...");
          if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder.");
          model.doScoring(
              trainScoreFrame,
              validScoreFrame,
              _job._key,
              0,
              false); // get the null model reconstruction error
        }
        // put the initial version of the model into DKV
        model.update(_job);
        model.total_setup_time_ms += now - _job.start_time();
        Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true));
        Log.info("Starting to train the Deep Learning model.");
        _job.update(0, "Training...");

        // main loop
        for (; ; ) {
          model.iterations++;
          model.set_model_info(
              mp._epochs == 0
                  ? model.model_info()
                  : H2O.CLOUD.size() > 1 && mp._replicate_training_data
                      ? (mp._single_node_mode
                          ? new DeepLearningTask2(
                                  _job._key,
                                  train,
                                  model.model_info(),
                                  rowFraction(train, mp, model),
                                  model.iterations)
                              .doAll(Key.make(H2O.SELF))
                              .model_info()
                          : // replicated data + single node mode
                          new DeepLearningTask2(
                                  _job._key,
                                  train,
                                  model.model_info(),
                                  rowFraction(train, mp, model),
                                  model.iterations)
                              .doAllNodes()
                              .model_info())
                      : // replicated data + multi-node mode
                      new DeepLearningTask(
                              _job._key,
                              model.model_info(),
                              rowFraction(train, mp, model),
                              model.iterations)
                          .doAll(train)
                          .model_info()); // distributed data (always in multi-node mode)
          if (stop_requested() && !timeout()) break; // cancellation
          if (!model.doScoring(
              trainScoreFrame, validScoreFrame, _job._key, model.iterations, false))
            break; // finished training (or early stopping or convergence)
          if (timeout()) break; // stop after scoring
        }

        // replace the model with the best model so far (if it's better)
        if (!stop_requested()
            && _parms._overwrite_with_best_model
            && model.actual_best_model_key != null
            && _parms._nfolds == 0) {
          DeepLearningModel best_model = DKV.getGet(model.actual_best_model_key);
          if (best_model != null
              && best_model.loss() < model.loss()
              && Arrays.equals(best_model.model_info().units, model.model_info().units)) {
            if (!_parms._quiet_mode)
              Log.info("Setting the model to be the best model so far (based on scoring history).");
            DeepLearningModelInfo mi = best_model.model_info().deep_clone();
            // Don't cheat - count full amount of training samples, since that's the amount of
            // training it took to train (without finding anything better)
            mi.set_processed_global(model.model_info().get_processed_global());
            mi.set_processed_local(model.model_info().get_processed_local());
            model.set_model_info(mi);
            model.update(_job);
            model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, true);
            assert (best_model.loss() == model.loss());
          }
        }
        // store coefficient names for future use
        // possibly change
        model.model_info().data_info().coefNames();
        if (!_parms._quiet_mode) {
          Log.info(
              "==============================================================================================================================================================================");
          if (stop_requested()) {
            Log.info("Deep Learning model training was interrupted.");
          } else {
            Log.info("Finished training the Deep Learning model.");
            Log.info(model);
          }
          Log.info(
              "==============================================================================================================================================================================");
        }
      } finally {
        if (model != null) {
          model.deleteElasticAverageModels();
          model.unlock(_job);
          if (model.actual_best_model_key != null) {
            assert (model.actual_best_model_key != model._key);
            DKV.remove(model.actual_best_model_key);
          }
        }
      }
      return model;
    }
Exemple #17
0
  private Chunk compress2() {
    // Check for basic mode info: all missing or all strings or mixed stuff
    byte mode = type();
    if (mode == Vec.T_BAD) // ALL NAs, nothing to do
    return new C0DChunk(Double.NaN, sparseLen());
    if (mode == Vec.T_STR) return new CStrChunk(_sslen, _ss, sparseLen(), _len, _is, _isAllASCII);
    boolean rerun = false;
    if (mode == Vec.T_CAT) {
      for (int i = 0; i < sparseLen(); i++)
        if (isCategorical2(i)) _xs[i] = 0;
        else if (!isNA2(i)) {
          setNA_impl2(i);
          ++_naCnt;
        }
      // Smack any mismatched string/numbers
    } else if (mode == Vec.T_NUM) {
      for (int i = 0; i < sparseLen(); i++)
        if (isCategorical2(i)) {
          setNA_impl2(i);
          rerun = true;
        }
    }
    if (rerun) {
      _naCnt = -1;
      type();
    } // Re-run rollups after dropping all numbers/categoricals

    boolean sparse = false;
    // sparse? treat as sparse iff we have at least MIN_SPARSE_RATIOx more zeros than nonzeros
    if (_sparseRatio * (_naCnt + _nzCnt) < _len) {
      set_sparse(_naCnt + _nzCnt);
      sparse = true;
    } else if (sparseLen() != _len) cancel_sparse();

    // If the data is UUIDs there's not much compression going on
    if (_ds != null && _ls != null) return chunkUUID();
    // cut out the easy all NaNs case
    if (_naCnt == _len) return new C0DChunk(Double.NaN, _len);
    // If the data was set8 as doubles, we do a quick check to see if it's
    // plain longs.  If not, we give up and use doubles.
    if (_ds != null) {
      int i; // check if we can flip to ints
      for (i = 0; i < sparseLen(); ++i)
        if (!Double.isNaN(_ds[i]) && (double) (long) _ds[i] != _ds[i]) break;
      boolean isInteger = i == sparseLen();
      boolean isConstant = !sparse || sparseLen() == 0;
      double constVal = 0;
      if (!sparse) { // check the values, sparse with some nonzeros can not be constant - has 0s and
        // (at least 1) nonzero
        constVal = _ds[0];
        for (int j = 1; j < _len; ++j)
          if (_ds[j] != constVal) {
            isConstant = false;
            break;
          }
      }
      if (isConstant)
        return isInteger ? new C0LChunk((long) constVal, _len) : new C0DChunk(constVal, _len);
      if (!isInteger) return sparse ? new CXDChunk(_len, sparseLen(), 8, bufD(8)) : chunkD();
      // Else flip to longs
      _ls = new long[_ds.length];
      _xs = new int[_ds.length];
      double[] ds = _ds;
      _ds = null;
      final int naCnt = _naCnt;
      for (i = 0; i < sparseLen(); i++) // Inject all doubles into longs
      if (Double.isNaN(ds[i])) setNA_impl2(i);
        else _ls[i] = (long) ds[i];
      // setNA_impl2 will set _naCnt to -1!
      // we already know what the naCnt is (it did not change!) so set it back to correct value
      _naCnt = naCnt;
    }

    // IF (_len > _sparseLen) THEN Sparse
    // Check for compressed *during appends*.  Here we know:
    // - No specials; _xs[]==0.
    // - No floats; _ds==null
    // - NZ length in _sparseLen, actual length in _len.
    // - Huge ratio between _len and _sparseLen, and we do NOT want to inflate to
    //   the larger size; we need to keep it all small all the time.
    // - Rows in _xs

    // Data in some fixed-point format, not doubles
    // See if we can sanely normalize all the data to the same fixed-point.
    int xmin = Integer.MAX_VALUE; // min exponent found
    boolean floatOverflow = false;
    double min = Double.POSITIVE_INFINITY;
    double max = Double.NEGATIVE_INFINITY;
    int p10iLength = PrettyPrint.powers10i.length;
    long llo = Long.MAX_VALUE, lhi = Long.MIN_VALUE;
    int xlo = Integer.MAX_VALUE, xhi = Integer.MIN_VALUE;

    for (int i = 0; i < sparseLen(); i++) {
      if (isNA2(i)) continue;
      long l = _ls[i];
      int x = _xs[i];
      assert x != Integer.MIN_VALUE : "l = " + l + ", x = " + x;
      if (x == Integer.MIN_VALUE + 1) x = 0; // Replace categorical flag with no scaling
      assert l != 0 || x == 0
          : "l == 0 while x = "
              + x
              + " ls = "
              + Arrays.toString(_ls); // Exponent of zero is always zero
      long t; // Remove extra scaling
      while (l != 0 && (t = l / 10) * 10 == l) {
        l = t;
        x++;
      }
      // Compute per-chunk min/max
      double d = l * PrettyPrint.pow10(x);
      if (d < min) {
        min = d;
        llo = l;
        xlo = x;
      }
      if (d > max) {
        max = d;
        lhi = l;
        xhi = x;
      }
      floatOverflow = l < Integer.MIN_VALUE + 1 || l > Integer.MAX_VALUE;
      xmin = Math.min(xmin, x);
    }
    if (sparse) { // sparse?  then compare vs implied 0s
      if (min > 0) {
        min = 0;
        llo = 0;
        xlo = 0;
      }
      if (max < 0) {
        max = 0;
        lhi = 0;
        xhi = 0;
      }
      xmin = Math.min(xmin, 0);
    }
    // Constant column?
    if (_naCnt == 0 && (min == max)) {
      if (llo == lhi && xlo == 0 && xhi == 0) return new C0LChunk(llo, _len);
      else if ((long) min == min) return new C0LChunk((long) min, _len);
      else return new C0DChunk(min, _len);
    }

    // Compute min & max, as scaled integers in the xmin scale.
    // Check for overflow along the way
    boolean overflow = ((xhi - xmin) >= p10iLength) || ((xlo - xmin) >= p10iLength);
    long lemax = 0, lemin = 0;
    if (!overflow) { // Can at least get the power-of-10 without overflow
      long pow10 = PrettyPrint.pow10i(xhi - xmin);
      lemax = lhi * pow10;
      // Hacker's Delight, Section 2-13, checking overflow.
      // Note that the power-10 is always positive, so the test devolves this:
      if ((lemax / pow10) != lhi) overflow = true;
      // Note that xlo might be > xmin; e.g. { 101e-49 , 1e-48}.
      long pow10lo = PrettyPrint.pow10i(xlo - xmin);
      lemin = llo * pow10lo;
      if ((lemin / pow10lo) != llo) overflow = true;
    }

    // Boolean column?
    if (max == 1 && min == 0 && xmin == 0 && !overflow) {
      if (sparse) { // Very sparse?
        return _naCnt == 0
            ? new CX0Chunk(_len, sparseLen(), bufS(0)) // No NAs, can store as sparse bitvector
            : new CXIChunk(_len, sparseLen(), 1, bufS(1)); // have NAs, store as sparse 1byte values
      }

      int bpv = _catCnt + _naCnt > 0 ? 2 : 1; // Bit-vector
      byte[] cbuf = bufB(bpv);
      return new CBSChunk(cbuf, cbuf[0], cbuf[1]);
    }

    final boolean fpoint = xmin < 0 || min < Long.MIN_VALUE || max > Long.MAX_VALUE;

    if (sparse) {
      if (fpoint) return new CXDChunk(_len, sparseLen(), 8, bufD(8));
      int sz = 8;
      if (Short.MIN_VALUE <= min && max <= Short.MAX_VALUE) sz = 2;
      else if (Integer.MIN_VALUE <= min && max <= Integer.MAX_VALUE) sz = 4;
      return new CXIChunk(_len, sparseLen(), sz, bufS(sz));
    }
    // Exponent scaling: replacing numbers like 1.3 with 13e-1.  '13' fits in a
    // byte and we scale the column by 0.1.  A set of numbers like
    // {1.2,23,0.34} then is normalized to always be represented with 2 digits
    // to the right: {1.20,23.00,0.34} and we scale by 100: {120,2300,34}.
    // This set fits in a 2-byte short.

    // We use exponent-scaling for bytes & shorts only; it's uncommon (and not
    // worth it) for larger numbers.  We need to get the exponents to be
    // uniform, so we scale up the largest lmax by the largest scale we need
    // and if that fits in a byte/short - then it's worth compressing.  Other
    // wise we just flip to a float or double representation.
    if (overflow || (fpoint && floatOverflow) || -35 > xmin || xmin > 35) return chunkD();
    final long leRange = leRange(lemin, lemax);
    if (fpoint) {
      if ((int) lemin == lemin && (int) lemax == lemax) {
        if (leRange < 255) // Fits in scaled biased byte?
        return new C1SChunk(bufX(lemin, xmin, C1SChunk._OFF, 0), lemin, PrettyPrint.pow10(xmin));
        if (leRange < 65535) { // we use signed 2B short, add -32k to the bias!
          long bias = 32767 + lemin;
          return new C2SChunk(bufX(bias, xmin, C2SChunk._OFF, 1), bias, PrettyPrint.pow10(xmin));
        }
      }
      if (leRange < 4294967295l) {
        long bias = 2147483647l + lemin;
        return new C4SChunk(bufX(bias, xmin, C4SChunk._OFF, 2), bias, PrettyPrint.pow10(xmin));
      }
      return chunkD();
    } // else an integer column

    // Compress column into a byte
    if (xmin == 0 && 0 <= lemin && lemax <= 255 && ((_naCnt + _catCnt) == 0))
      return new C1NChunk(bufX(0, 0, C1NChunk._OFF, 0));
    if (lemin < Integer.MIN_VALUE) return new C8Chunk(bufX(0, 0, 0, 3));
    if (leRange < 255) { // Span fits in a byte?
      if (0 <= min && max < 255) // Span fits in an unbiased byte?
      return new C1Chunk(bufX(0, 0, C1Chunk._OFF, 0));
      return new C1SChunk(bufX(lemin, xmin, C1SChunk._OFF, 0), lemin, PrettyPrint.pow10i(xmin));
    }

    // Compress column into a short
    if (leRange < 65535) { // Span fits in a biased short?
      if (xmin == 0
          && Short.MIN_VALUE < lemin
          && lemax <= Short.MAX_VALUE) // Span fits in an unbiased short?
      return new C2Chunk(bufX(0, 0, C2Chunk._OFF, 1));
      long bias = (lemin - (Short.MIN_VALUE + 1));
      return new C2SChunk(bufX(bias, xmin, C2SChunk._OFF, 1), bias, PrettyPrint.pow10i(xmin));
    }
    // Compress column into ints
    if (Integer.MIN_VALUE < min && max <= Integer.MAX_VALUE) return new C4Chunk(bufX(0, 0, 0, 2));
    return new C8Chunk(bufX(0, 0, 0, 3));
  }
Exemple #18
0
  private TwoDimTable createScoringHistoryTable(SharedTreeModel.SharedTreeOutput _output) {
    List<String> colHeaders = new ArrayList<>();
    List<String> colTypes = new ArrayList<>();
    List<String> colFormat = new ArrayList<>();
    colHeaders.add("Timestamp");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Duration");
    colTypes.add("string");
    colFormat.add("%s");
    colHeaders.add("Number of Trees");
    colTypes.add("long");
    colFormat.add("%d");
    colHeaders.add("Training MSE");
    colTypes.add("double");
    colFormat.add("%.5f");
    if (_output.isClassifier()) {
      colHeaders.add("Training LogLoss");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial) {
      colHeaders.add("Training AUC");
      colTypes.add("double");
      colFormat.add("%.5f");
    }
    if (_output.getModelCategory() == ModelCategory.Binomial
        || _output.getModelCategory() == ModelCategory.Multinomial) {
      colHeaders.add("Training Classification Error");
      colTypes.add("double");
      colFormat.add("%.5f");
    }

    if (valid() != null) {
      colHeaders.add("Validation MSE");
      colTypes.add("double");
      colFormat.add("%.5f");
      if (_output.isClassifier()) {
        colHeaders.add("Validation LogLoss");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.getModelCategory() == ModelCategory.Binomial) {
        colHeaders.add("Validation AUC");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
      if (_output.isClassifier()) {
        colHeaders.add("Validation Classification Error");
        colTypes.add("double");
        colFormat.add("%.5f");
      }
    }

    int rows = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (!Double.isNaN(_output._scored_train[i]._mse)) ++rows;
    }
    TwoDimTable table =
        new TwoDimTable(
            "Scoring History",
            null,
            new String[rows],
            colHeaders.toArray(new String[0]),
            colTypes.toArray(new String[0]),
            colFormat.toArray(new String[0]),
            "");
    int row = 0;
    for (int i = 1; i < _output._scored_train.length; i++) {
      if (Double.isNaN(_output._scored_train[i]._mse)) continue;
      int col = 0;
      assert (row < table.getRowDim());
      assert (col < table.getColDim());
      DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
      table.set(row, col++, fmt.print(_output._training_time_ms[i]));
      table.set(row, col++, PrettyPrint.msecs(_output._training_time_ms[i] - _start_time, true));
      table.set(row, col++, i);
      ScoreKeeper st = _output._scored_train[i];
      table.set(row, col++, st._mse);
      if (_output.isClassifier()) table.set(row, col++, st._logloss);
      if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
      if (_output.isClassifier()) table.set(row, col++, st._classError);

      if (_valid != null) {
        st = _output._scored_valid[i];
        table.set(row, col++, st._mse);
        if (_output.isClassifier()) table.set(row, col++, st._logloss);
        if (_output.getModelCategory() == ModelCategory.Binomial) table.set(row, col++, st._AUC);
        if (_output.isClassifier()) table.set(row, col++, st._classError);
      }
      row++;
    }
    return table;
  }
  /**
   * Create a summary table
   *
   * @return
   */
  TwoDimTable createSummaryTable() {
    Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(this);
    long byte_size = new AutoBuffer().put(this).buf().length;
    TwoDimTable table =
        new TwoDimTable(
            "Status of Neuron Layers",
            (get_params()._diagnostics ? "" : "diagnostics disabled, ")
                + (!get_params()._autoencoder ? ("predicting " + _train.lastVecName() + ", ") : "")
                + (get_params()._autoencoder
                    ? "auto-encoder"
                    : _classification
                        ? (units[units.length - 1] + "-class classification")
                        : "regression")
                + ", "
                + get_params()._distribution
                + " distribution, "
                + get_params()._loss
                + " loss, "
                + String.format("%,d", size())
                + " weights/biases, "
                + PrettyPrint.bytes(byte_size)
                + ", "
                + String.format("%,d", get_processed_global())
                + " training samples, "
                + "mini-batch size "
                + String.format("%,d", get_params()._mini_batch_size),
            new String[neurons.length],
            new String[] {
              "Layer",
              "Units",
              "Type",
              "Dropout",
              "L1",
              "L2",
              "Mean Rate",
              "Rate RMS",
              "Momentum",
              "Mean Weight",
              "Weight RMS",
              "Mean Bias",
              "Bias RMS"
            },
            new String[] {
              "int", "int", "string", "double", "double", "double", "double", "double", "double",
              "double", "double", "double", "double"
            },
            new String[] {
              "%d",
              "%d",
              "%s",
              "%2.2f %%",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f",
              "%5f"
            },
            "");

    for (int i = 0; i < neurons.length; ++i) {
      table.set(i, 0, i + 1);
      table.set(i, 1, neurons[i].units);
      table.set(i, 2, neurons[i].getClass().getSimpleName());

      if (i == 0) {
        table.set(i, 3, neurons[i].params._input_dropout_ratio * 100);
        continue;
      } else if (i < neurons.length - 1) {
        if (neurons[i].params._hidden_dropout_ratios == null) {
          table.set(i, 3, 0);
        } else {
          table.set(i, 3, neurons[i].params._hidden_dropout_ratios[i - 1] * 100);
        }
      }
      table.set(i, 4, neurons[i].params._l1);
      table.set(i, 5, neurons[i].params._l2);
      table.set(
          i,
          6,
          (get_params()._adaptive_rate ? mean_rate[i] : neurons[i].rate(get_processed_total())));
      table.set(i, 7, (get_params()._adaptive_rate ? rms_rate[i] : 0));
      table.set(i, 8, get_params()._adaptive_rate ? 0 : neurons[i].momentum(get_processed_total()));
      table.set(i, 9, mean_weight[i]);
      table.set(i, 10, rms_weight[i]);
      table.set(i, 11, mean_bias[i]);
      table.set(i, 12, rms_bias[i]);
    }
    summaryTable = table;
    return summaryTable;
  }