// compute model size (number of model parameters required for making predictions)
 // momenta are not counted here, but they are needed for model building
 public long size() {
   long siz = 0;
   for (Storage.Matrix w : dense_row_weights) if (w != null) siz += w.size();
   for (Storage.Matrix w : dense_col_weights) if (w != null) siz += w.size();
   for (Storage.Vector b : biases) siz += b.size();
   return siz;
 }
 /**
  * Divide all weights/biases by a real-valued number
  *
  * @param N
  */
 protected void div(float N) {
   for (int i = 0; i < dense_row_weights.length; ++i) ArrayUtils.div(get_weights(i).raw(), N);
   for (Storage.Vector bias : biases) ArrayUtils.div(bias.raw(), N);
   if (avg_activations != null)
     for (Storage.Vector avgac : avg_activations) ArrayUtils.div(avgac.raw(), N);
   if (has_momenta()) {
     for (int i = 0; i < dense_row_weights_momenta.length; ++i)
       ArrayUtils.div(get_weights_momenta(i).raw(), N);
     for (Storage.Vector bias_momenta : biases_momenta) ArrayUtils.div(bias_momenta.raw(), N);
   }
   if (adaDelta()) {
     for (int i = 0; i < dense_row_ada_dx_g.length; ++i) {
       ArrayUtils.div(get_ada_dx_g(i).raw(), N);
     }
   }
 }