Example #1
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;
    if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS)
      error("_nclass", "Too many levels in response column!");

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > 100000)
      error("_ntrees", "Requested ntrees must be between 1 and 100000");
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms._checkpoint) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._model_id);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "Requested ntrees must be between "
                  + checkpointModel._output._ntrees
                  + 1
                  + " and 100000");
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_parms._distribution == Distributions.Family.tweedie) {
      _parms._distribution.tweedie.p = _parms._tweedie_power;
    }
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Example #2
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
      error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._checkpoint);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        try {
          _parms.validateWithCheckpoint(checkpointModel._parms);
        } catch (H2OIllegalArgumentException e) {
          error(e.values.get("argument").toString(), e.values.get("value").toString());
        }
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "If checkpoint is specified then requested ntrees must be higher than "
                  + (checkpointModel._output._ntrees + 1));

        // Compute number of trees to build for this checkpoint
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
  // TODO: almost identical to ModelsHandler; refactor
  public static ModelMetrics getFromDKV(Key key) {
    if (null == key) throw new IllegalArgumentException("Got null key.");

    Value v = DKV.get(key);
    if (null == v) throw new IllegalArgumentException("Did not find key: " + key.toString());

    Iced ice = v.get();
    if (!(ice instanceof ModelMetrics))
      throw new IllegalArgumentException(
          "Expected a Model for key: " + key.toString() + "; got a: " + ice.getClass());

    return (ModelMetrics) ice;
  }
Example #4
0
  public static String store2Hdfs(Key srcKey) {
    assert srcKey._kb[0] != Key.ARRAYLET_CHUNK;
    assert PersistHdfs.getPathForKey(srcKey) != null; // Validate key name
    Value v = DKV.get(srcKey);
    if (v == null) return "Key " + srcKey + " not found";
    if (!v.isArray()) { // Simple chunk?
      v.setHdfs(); // Set to HDFS and be done
      return null; // Success
    }

    // For ValueArrays, make the .hex header
    ValueArray ary = v.get();
    String err = PersistHdfs.freeze(srcKey, ary);
    if (err != null) return err;

    // The task managing which chunks to write next,
    // store in a known key
    TaskStore2HDFS ts = new TaskStore2HDFS(srcKey);
    Key selfKey = ts.selfKey();
    UKV.put(selfKey, ts);

    // Then start writing chunks in-order with the zero chunk
    H2ONode chk0_home = ValueArray.getChunkKey(0, srcKey).home_node();
    RPC.call(ts.chunkHome(), ts);

    // Watch the progress key until it gets removed or an error appears
    long idx = 0;
    while ((ts = UKV.get(selfKey, TaskStore2HDFS.class)) != null) {
      if (ts._indexFrom != idx) {
        System.out.print(" " + idx + "/" + ary.chunks());
        idx = ts._indexFrom;
      }
      if (ts._err != null) { // Found an error?
        UKV.remove(selfKey); // Cleanup & report
        return ts._err;
      }
      try {
        Thread.sleep(100);
      } catch (InterruptedException e) {
      }
    }
    System.out.println(" " + ary.chunks() + "/" + ary.chunks());

    // PersistHdfs.refreshHDFSKeys();
    return null;
  }
Example #5
0
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public JobsV3 fetch(int version, JobsV3 s) {
    Key key = s.job_id.key();
    Value val = DKV.get(key);
    if (null == val) throw new IllegalArgumentException("Job is missing");
    Iced ice = val.get();
    if (!(ice instanceof Job))
      throw new IllegalArgumentException("Must be a Job not a " + ice.getClass());

    Job j = (Job) ice;
    s.jobs = new JobV3[1];
    // s.fillFromImpl(jobs);
    try {
      s.jobs[0] = (JobV3) Schema.schema(version, j).fillFromImpl(j);
    }
    // no special schema for this job subclass, so fall back to JobV3
    catch (H2ONotFoundArgumentException e) {
      s.jobs[0] = new JobV3().fillFromImpl(j);
    }
    return s;
  }
Example #6
0
 static boolean checkSaneFrame_impl() {
   for (Key k : H2O.localKeySet()) {
     Value val = H2O.raw_get(k);
     if (val.isFrame()) {
       Frame fr = val.get();
       Vec vecs[] = fr.vecs();
       for (int i = 0; i < vecs.length; i++) {
         Vec v = vecs[i];
         if (DKV.get(v._key) == null) {
           System.err.println(
               "Frame "
                   + fr._key
                   + " in the DKV, is missing Vec "
                   + v._key
                   + ", name="
                   + fr._names[i]);
           return false;
         }
       }
     }
   }
   return true;
 }
Example #7
0
  /**
   * Copy properties "of the same name" from one POJO to the other. If the fields are named
   * consistently (both sides have fields named "_foo" and/or "bar") this acts like Apache Commons
   * PojoUtils.copyProperties(). If one side has leading underscores and the other does not then the
   * names are conformed according to the field_naming parameter.
   *
   * @param dest Destination POJO
   * @param origin Origin POJO
   * @param field_naming Are the fields named consistently, or does one side have underscores?
   * @param skip_fields Array of origin or destination field names to skip
   * @param only_fields Array of origin or destination field names to include; ones not in this list
   *     will be skipped
   */
  public static void copyProperties(
      Object dest,
      Object origin,
      FieldNaming field_naming,
      String[] skip_fields,
      String[] only_fields) {
    if (null == dest || null == origin) return;

    Field[] dest_fields = Weaver.getWovenFields(dest.getClass());
    Field[] orig_fields = Weaver.getWovenFields(origin.getClass());

    for (Field orig_field : orig_fields) {
      String origin_name = orig_field.getName();

      if (skip_fields != null & ArrayUtils.contains(skip_fields, origin_name)) continue;

      if (only_fields != null & !ArrayUtils.contains(only_fields, origin_name)) continue;

      String dest_name = null;
      if (field_naming == FieldNaming.CONSISTENT) {
        dest_name = origin_name;
      } else if (field_naming == FieldNaming.DEST_HAS_UNDERSCORES) {
        dest_name = "_" + origin_name;
      } else if (field_naming == FieldNaming.ORIGIN_HAS_UNDERSCORES) {
        dest_name = origin_name.substring(1);
      }

      if (skip_fields != null & ArrayUtils.contains(skip_fields, dest_name)) continue;

      if (only_fields != null & !ArrayUtils.contains(only_fields, dest_name)) continue;

      try {
        Field dest_field = null;
        for (Field fd : dest_fields) {
          if (fd.getName().equals(dest_name)) {
            dest_field = fd;
            break;
          }
        }

        if (dest_field != null) {
          dest_field.setAccessible(true);
          orig_field.setAccessible(true);
          // Log.info("PojoUtils.copyProperties, origin field: " + orig_field + "; destination
          // field: " + dest_field);
          if (null == orig_field.get(origin)) {
            //
            // Assigning null to dest.
            //
            dest_field.set(dest, null);
          } else if (dest_field.getType().isArray()
              && orig_field.getType().isArray()
              && (dest_field.getType().getComponentType()
                  != orig_field.getType().getComponentType())) {
            //
            // Assigning an array to another array.
            //
            // You can't use reflection to set an int[] with an Integer[].  Argh.
            // TODO: other types of arrays. . .
            if (dest_field.getType().getComponentType() == double.class
                && orig_field.getType().getComponentType() == Double.class) {
              //
              // Assigning an Double[] to an double[]
              //
              double[] copy = (double[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (dest_field.getType().getComponentType() == Double.class
                && orig_field.getType().getComponentType() == double.class) {
              //
              // Assigning an double[] to an Double[]
              //
              Double[] copy = (Double[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (dest_field.getType().getComponentType() == int.class
                && orig_field.getType().getComponentType() == Integer.class) {
              //
              // Assigning an Integer[] to an int[]
              //
              int[] copy = (int[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (dest_field.getType().getComponentType() == Integer.class
                && orig_field.getType().getComponentType() == int.class) {
              //
              // Assigning an int[] to an Integer[]
              //
              Integer[] copy = (Integer[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (Schema.class.isAssignableFrom(dest_field.getType().getComponentType())
                && (Schema.getImplClass(
                        (Class<? extends Schema>) dest_field.getType().getComponentType()))
                    .isAssignableFrom(orig_field.getType().getComponentType())) {
              //
              // Assigning an array of impl fields to an array of schema fields, e.g. a
              // DeepLearningParameters[] into a DeepLearningParametersV2[]
              //
              Class dest_component_class = dest_field.getType().getComponentType();
              Schema[] translation =
                  (Schema[])
                      Array.newInstance(
                          dest_component_class, Array.getLength(orig_field.get(origin)));
              int i = 0;
              int version = ((Schema) dest).getSchemaVersion();

              // Look up the schema for each element of the array; if not found fall back to the
              // schema for the base class.
              for (Iced impl : ((Iced[]) orig_field.get(origin))) {
                if (null == impl) {
                  translation[i++] = null;
                } else {
                  Schema s = null;
                  try {
                    s = Schema.schema(version, impl);
                  } catch (H2ONotFoundArgumentException e) {
                    s = ((Schema) dest_field.getType().getComponentType().newInstance());
                  }
                  translation[i++] = s.fillFromImpl(impl);
                }
              }
              dest_field.set(dest, translation);
            } else if (Schema.class.isAssignableFrom(orig_field.getType().getComponentType())
                && Iced.class.isAssignableFrom(dest_field.getType().getComponentType())) {
              //
              // Assigning an array of schema fields to an array of impl fields, e.g. a
              // DeepLearningParametersV2[] into a DeepLearningParameters[]
              //
              // We can't check against the actual impl class I, because we can't instantiate the
              // schema base classes to get the impl class from an instance:
              // dest_field.getType().getComponentType().isAssignableFrom(((Schema)f.getType().getComponentType().newInstance()).getImplClass())) {
              Class dest_component_class = dest_field.getType().getComponentType();
              Iced[] translation =
                  (Iced[])
                      Array.newInstance(
                          dest_component_class, Array.getLength(orig_field.get(origin)));
              int i = 0;
              for (Schema s : ((Schema[]) orig_field.get(origin))) {
                translation[i++] = s.createImpl();
              }
              dest_field.set(dest, translation);
            } else {
              throw H2O.fail(
                  "Don't know how to cast an array of: "
                      + orig_field.getType().getComponentType()
                      + " to an array of: "
                      + dest_field.getType().getComponentType());
            }
            // end of array handling
          } else if (dest_field.getType() == Key.class
              && Keyed.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a Keyed (e.g., a Frame or Model) to a Key.
            //
            dest_field.set(dest, ((Keyed) orig_field.get(origin))._key);
          } else if (orig_field.getType() == Key.class
              && Keyed.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a Key (for e.g., a Frame or Model) to a Keyed (e.g., a Frame or Model).
            //
            Value v = DKV.get((Key) orig_field.get(origin));
            dest_field.set(dest, (null == v ? null : v.get()));
          } else if (KeyV3.class.isAssignableFrom(dest_field.getType())
              && Keyed.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a Keyed (e.g., a Frame or Model) to a KeyV1.
            //
            dest_field.set(
                dest,
                KeyV3.make(
                    ((Class<? extends KeyV3>) dest_field.getType()),
                    ((Keyed) orig_field.get(origin))._key));
          } else if (KeyV3.class.isAssignableFrom(orig_field.getType())
              && Keyed.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a KeyV1 (for e.g., a Frame or Model) to a Keyed (e.g., a Frame or Model).
            //
            KeyV3 k = (KeyV3) orig_field.get(origin);
            Value v = DKV.get(Key.make(k.name));
            dest_field.set(dest, (null == v ? null : v.get()));
          } else if (KeyV3.class.isAssignableFrom(dest_field.getType())
              && Key.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a Key to a KeyV1.
            //
            dest_field.set(
                dest,
                KeyV3.make(
                    ((Class<? extends KeyV3>) dest_field.getType()), (Key) orig_field.get(origin)));
          } else if (KeyV3.class.isAssignableFrom(orig_field.getType())
              && Key.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a KeyV1 to a Key.
            //
            KeyV3 k = (KeyV3) orig_field.get(origin);
            dest_field.set(dest, (null == k.name ? null : Key.make(k.name)));
          } else if (dest_field.getType() == Pattern.class
              && String.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a String to a Pattern.
            //
            dest_field.set(dest, Pattern.compile((String) orig_field.get(origin)));
          } else if (orig_field.getType() == Pattern.class
              && String.class.isAssignableFrom(dest_field.getType())) {
            //
            // We are assigning a Pattern to a String.
            //
            dest_field.set(dest, orig_field.get(origin).toString());
          } else if (dest_field.getType() == FrameV3.ColSpecifierV3.class
              && String.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a String to a ColSpecifier.  Note that we currently support only the
            // colname, not a frame name too.
            //
            dest_field.set(dest, new FrameV3.ColSpecifierV3((String) orig_field.get(origin)));
          } else if (orig_field.getType() == FrameV3.ColSpecifierV3.class
              && String.class.isAssignableFrom(dest_field.getType())) {
            //
            // We are assigning a ColSpecifierV2 to a String.  The column_name gets copied.
            //
            dest_field.set(dest, ((FrameV3.ColSpecifierV3) orig_field.get(origin)).column_name);
          } else if (Enum.class.isAssignableFrom(dest_field.getType())
              && String.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a String into an enum field.
            //
            Class<Enum> dest_class = (Class<Enum>) dest_field.getType();
            dest_field.set(dest, Enum.valueOf(dest_class, (String) orig_field.get(origin)));
          } else if (Enum.class.isAssignableFrom(orig_field.getType())
              && String.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning an enum field into a String.
            //
            Object o = orig_field.get(origin);
            dest_field.set(dest, (o == null ? null : o.toString()));
          } else if (Schema.class.isAssignableFrom(dest_field.getType())
              && Schema.getImplClass((Class<? extends Schema>) dest_field.getType())
                  .isAssignableFrom(orig_field.getType())) {
            //
            // Assigning an impl field into a schema field, e.g. a DeepLearningParameters into a
            // DeepLearningParametersV2.
            //
            dest_field.set(
                dest,
                Schema.schema(
                        /* ((Schema)dest).getSchemaVersion() TODO: remove HACK!! */ 3,
                        (Class<? extends Iced>) orig_field.get(origin).getClass())
                    .fillFromImpl((Iced) orig_field.get(origin)));
          } else if (Schema.class.isAssignableFrom(orig_field.getType())
              && Schema.getImplClass((Class<? extends Schema>) orig_field.getType())
                  .isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a schema field into an impl field, e.g. a DeepLearningParametersV2 into a
            // DeepLearningParameters.
            //
            Schema s = ((Schema) orig_field.get(origin));
            dest_field.set(dest, s.fillImpl(s.createImpl()));
          } else if ((Schema.class.isAssignableFrom(dest_field.getType())
              && Key.class.isAssignableFrom(orig_field.getType()))) {
            //
            // Assigning an impl field fetched via a Key into a schema field, e.g. a
            // DeepLearningParameters into a DeepLearningParametersV2.
            // Note that unlike the cases above we don't know the type of the impl class until we
            // fetch in the body of the if.
            //
            Key origin_key = (Key) orig_field.get(origin);
            Value v = DKV.get(origin_key);
            if (null == v || null == v.get()) {
              dest_field.set(dest, null);
            } else {
              if (((Schema) dest_field.get(dest))
                  .getImplClass()
                  .isAssignableFrom(v.get().getClass())) {
                Schema s = ((Schema) dest_field.get(dest));
                dest_field.set(
                    dest,
                    Schema.schema(s.getSchemaVersion(), s.getImplClass()).fillFromImpl(v.get()));
              } else {
                Log.err(
                    "Can't fill Schema of type: "
                        + dest_field.getType()
                        + " with value of type: "
                        + v.getClass()
                        + " fetched from Key: "
                        + origin_key);
                dest_field.set(dest, null);
              }
            }
          } else if (Schema.class.isAssignableFrom(orig_field.getType())
              && Keyed.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a schema field into a Key field, e.g. a DeepLearningV2 into a
            // (DeepLearningParameters) key.
            //
            Schema s = ((Schema) orig_field.get(origin));
            dest_field.set(dest, ((Keyed) s.fillImpl(s.createImpl()))._key);
          } else {
            //
            // Normal case: not doing any type conversion.
            //
            dest_field.set(dest, orig_field.get(origin));
          }
        }
      } catch (IllegalAccessException e) {
        Log.err(
            "Illegal access exception trying to copy field: "
                + origin_name
                + " of class: "
                + origin.getClass()
                + " to field: "
                + dest_name
                + " of class: "
                + dest.getClass());
      } catch (InstantiationException e) {
        Log.err(
            "Instantiation exception trying to copy field: "
                + origin_name
                + " of class: "
                + origin.getClass()
                + " to field: "
                + dest_name
                + " of class: "
                + dest.getClass());
      }
    }
  }