Example #1
0
  @Override
  protected Response serve() {
    try {
      // pull everything local
      Log.info("ExportFiles processing (" + path + ")");
      if (DKV.get(src_key) == null)
        throw new IllegalArgumentException(src_key.toString() + " not found.");
      Object value = DKV.get(src_key).get();
      // create a stream to read the entire VA or Frame
      if (!(value instanceof ValueArray) && !(value instanceof Frame))
        throw new UnsupportedOperationException("Can only export Frames or ValueArrays.");
      InputStream csv =
          value instanceof ValueArray
              ? new ValueArray.CsvVAStream((ValueArray) value, null)
              : ((Frame) value).toCSV(true);
      String p2 = path.toLowerCase();
      if (p2.startsWith("hdfs://")) serveHdfs(csv);
      else if (p2.startsWith("s3n://")) serveHdfs(csv);
      else serveLocalDisk(csv);

      return RequestBuilders.Response.done(this);
    } catch (Throwable t) {
      return RequestBuilders.Response.error(t);
    }
  }
Example #2
0
 static Frame exec_str(String str, String id) {
   Val val = Exec.exec(str);
   switch (val.type()) {
     case Val.FRM:
       Frame fr = val.getFrame();
       Key k = Key.make(id);
       // Smart delete any prior top-level result
       Iced i = DKV.getGet(k);
       if (i instanceof Lockable) ((Lockable) i).delete();
       else if (i instanceof Keyed) ((Keyed) i).remove();
       else if (i != null)
         throw new IllegalArgumentException("Attempting to overright an unexpected key");
       DKV.put(fr = new Frame(k, fr._names, fr.vecs()));
       System.out.println(fr);
       checkSaneFrame();
       return fr;
     case Val.NUM:
       System.out.println("num= " + val.getNum());
       assert id == null;
       checkSaneFrame();
       return null;
     case Val.STR:
       System.out.println("str= " + val.getStr());
       assert id == null;
       checkSaneFrame();
       return null;
     default:
       throw water.H2O.fail();
   }
 }
  /**
   * Score a frame with the given model and return just the metrics.
   *
   * <p>NOTE: ModelMetrics are now always being created by model.score. . .
   */
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public ModelMetricsListSchemaV3 score(int version, ModelMetricsListSchemaV3 s) {
    // parameters checking:
    if (null == s.model) throw new H2OIllegalArgumentException("model", "predict", s.model);
    if (null == DKV.get(s.model.name))
      throw new H2OKeyNotFoundArgumentException("model", "predict", s.model.name);

    if (null == s.frame) throw new H2OIllegalArgumentException("frame", "predict", s.frame);
    if (null == DKV.get(s.frame.name))
      throw new H2OKeyNotFoundArgumentException("frame", "predict", s.frame.name);

    ModelMetricsList parms = s.createAndFillImpl();
    parms
        ._model
        .score(parms._frame, parms._predictions_name)
        .remove(); // throw away predictions, keep metrics as a side-effect
    ModelMetricsListSchemaV3 mm = this.fetch(version, s);

    // TODO: for now only binary predictors write an MM object.
    // For the others cons one up here to return the predictions frame.
    if (null == mm) mm = new ModelMetricsListSchemaV3();

    if (null == mm.model_metrics || 0 == mm.model_metrics.length) {
      Log.warn(
          "Score() did not return a ModelMetrics for model: " + s.model + " on frame: " + s.frame);
    }

    return mm;
  }
Example #4
0
 public GLMModelV3 make_model(int version, MakeGLMModelV3 args) {
   GLMModel model = DKV.getGet(args.model.key());
   if (model == null) throw new IllegalArgumentException("missing source model " + args.model);
   String[] names = model._output.coefficientNames();
   Map<String, Double> coefs = model.coefficients();
   for (int i = 0; i < args.names.length; ++i) coefs.put(args.names[i], args.beta[i]);
   double[] beta = model.beta().clone();
   for (int i = 0; i < beta.length; ++i) beta[i] = coefs.get(names[i]);
   GLMModel m =
       new GLMModel(
           args.dest != null ? args.dest.key() : Key.make(),
           model._parms,
           null,
           new double[] {.5},
           Double.NaN,
           Double.NaN,
           -1);
   DataInfo dinfo = model.dinfo();
   dinfo.setPredictorTransform(TransformType.NONE);
   // GLMOutput(DataInfo dinfo, String[] column_names, String[][] domains, String[]
   // coefficient_names, boolean binomial) {
   m._output =
       new GLMOutput(
           model.dinfo(),
           model._output._names,
           model._output._domains,
           model._output.coefficientNames(),
           model._output._binomial,
           beta);
   DKV.put(m._key, m);
   GLMModelV3 res = new GLMModelV3();
   res.fillFromImpl(m);
   return res;
 }
Example #5
0
 @Override
 public float progress() {
   if (DKV.get(dest()) == null) return 0;
   GLMModel m = DKV.get(dest()).get();
   float progress = (float) m.iteration() / (float) max_iter; // TODO, do something smarter here
   return progress;
 }
  /** Score a frame with the given model and return the metrics AND the prediction frame. */
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public JobV3 predict2(int version, final ModelMetricsListSchemaV3 s) {
    // parameters checking:
    if (null == s.model) throw new H2OIllegalArgumentException("model", "predict", s.model);
    if (null == DKV.get(s.model.name))
      throw new H2OKeyNotFoundArgumentException("model", "predict", s.model.name);

    if (null == s.frame) throw new H2OIllegalArgumentException("frame", "predict", s.frame);
    if (null == DKV.get(s.frame.name))
      throw new H2OKeyNotFoundArgumentException("frame", "predict", s.frame.name);

    final ModelMetricsList parms = s.createAndFillImpl();

    // predict2 does not return modelmetrics, so cannot handle deeplearning: reconstruction_error
    // (anomaly) or GLRM: reconstruct and archetypes
    // predict2 can handle deeplearning: deepfeatures and predict

    if (s.deep_features_hidden_layer > 0) {
      if (null == parms._predictions_name)
        parms._predictions_name =
            "deep_features"
                + Key.make().toString().substring(0, 5)
                + "_"
                + parms._model._key.toString()
                + "_on_"
                + parms._frame._key.toString();
    } else if (null == parms._predictions_name)
      parms._predictions_name =
          "predictions"
              + Key.make().toString().substring(0, 5)
              + "_"
              + parms._model._key.toString()
              + "_on_"
              + parms._frame._key.toString();

    final Job<Frame> j =
        new Job(Key.make(parms._predictions_name), Frame.class.getName(), "prediction");

    H2O.H2OCountedCompleter work =
        new H2O.H2OCountedCompleter() {
          @Override
          public void compute2() {
            if (s.deep_features_hidden_layer < 0) {
              parms._model.score(parms._frame, parms._predictions_name, j);
            } else {
              Frame predictions =
                  ((Model.DeepFeatures) parms._model)
                      .scoreDeepFeatures(parms._frame, s.deep_features_hidden_layer, j);
              predictions =
                  new Frame(
                      Key.make(parms._predictions_name), predictions.names(), predictions.vecs());
              DKV.put(predictions._key, predictions);
            }
            tryComplete();
          }
        };
    j.start(work, parms._frame.anyVec().nChunks());
    return new JobV3().fillFromImpl(j);
  }
Example #7
0
 public Vec replace(int col, Vec nv) {
   assert col < _names.length;
   Vec rv = vecs()[col];
   assert rv.group().equals(nv.group());
   _vecs[col] = nv;
   _keys[col] = nv._key;
   if (DKV.get(nv._key) == null) // If not already in KV, put it there
   DKV.put(nv._key, nv);
   return rv;
 }
Example #8
0
 public static Response redirect(JsonObject fromPageResponse, Key rfModelKey) {
   RFModel rfModel = DKV.get(rfModelKey).get();
   ValueArray data = DKV.get(rfModel._dataKey).get();
   return redirect(
       fromPageResponse,
       null,
       rfModelKey,
       rfModel._dataKey,
       rfModel._totalTrees,
       data.numCols() - 1,
       null,
       true,
       false);
 }
Example #9
0
 /**
  * Creates the value header based on the calculated columns.
  *
  * <p>Also stores the header to its appropriate key. This will be the VA header of the parsed
  * dataset.
  */
 private void createValueArrayHeader() {
   assert (_phase == Pass.TWO);
   Column[] cols = new Column[_ncolumns];
   int off = 0;
   for (int i = 0; i < cols.length; ++i) {
     cols[i] = new Column();
     cols[i]._n = _numRows - _invalidValues[i];
     cols[i]._base = _bases[i];
     assert (char) pow10i(-_scale[i]) == pow10i(-_scale[i])
         : "scale out of bounds!, col = " + i + ", scale = " + _scale[i];
     cols[i]._scale = (char) pow10i(-_scale[i]);
     cols[i]._off = (char) off;
     cols[i]._size = (byte) COL_SIZES[_colTypes[i]];
     cols[i]._domain = _colDomains[i];
     cols[i]._max = _max[i];
     cols[i]._min = _min[i];
     cols[i]._mean = _mean[i];
     cols[i]._sigma = _sigma[i];
     cols[i]._name = _colNames[i];
     off += Math.abs(cols[i]._size);
   }
   // let any pending progress reports finish
   DKV.write_barrier();
   // finally make the value array header
   ValueArray ary = new ValueArray(_resultKey, _numRows, off, cols);
   UKV.put(_resultKey, ary.value());
 }
Example #10
0
 // ------------------------------------------------------------------------
 // Zipped file; no parallel decompression; decompress into local chunks,
 // parse local chunks; distribute chunks later.
 ParseWriter streamParseZip(final InputStream is, final StreamParseWriter dout, InputStream bvs)
     throws IOException {
   // All output into a fresh pile of NewChunks, one per column
   if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl();
   StreamData din = new StreamData(is);
   int cidx = 0;
   StreamParseWriter nextChunk = dout;
   int zidx = bvs.read(null, 0, 0); // Back-channel read of chunk index
   assert zidx == 1;
   while (is.available() > 0) {
     int xidx = bvs.read(null, 0, 0); // Back-channel read of chunk index
     if (xidx > zidx) { // Advanced chunk index of underlying ByteVec stream?
       zidx = xidx; // Record advancing of chunk
       nextChunk.close(); // Match output chunks to input zipfile chunks
       if (dout != nextChunk) {
         dout.reduce(nextChunk);
         if (_jobKey != null && ((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()) break;
       }
       nextChunk = nextChunk.nextChunk();
     }
     parseChunk(cidx++, din, nextChunk);
   }
   parseChunk(cidx, din, nextChunk); // Parse the remaining partial 32K buffer
   nextChunk.close();
   if (dout != nextChunk) dout.reduce(nextChunk);
   return dout;
 }
Example #11
0
  @Override
  protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key) {
    Frame adaptFrm = new Frame(adaptedFr);
    for (int i = 0; i < _parms._k; i++)
      adaptFrm.add("PC" + String.valueOf(i + 1), adaptFrm.anyVec().makeZero());

    new MRTask() {
      @Override
      public void map(Chunk chks[]) {
        double tmp[] = new double[_output._names.length];
        double preds[] = new double[_parms._k];
        for (int row = 0; row < chks[0]._len; row++) {
          double p[] = score0(chks, row, tmp, preds);
          for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
        }
      }
    }.doAll(adaptFrm);

    // Return the projection into principal component space
    int x = _output._names.length, y = adaptFrm.numCols();
    Frame f =
        adaptFrm.extractFrame(
            x, y); // this will call vec_impl() and we cannot call the delete() below just yet

    f =
        new Frame(
            (null == destination_key ? Key.make() : Key.make(destination_key)),
            f.names(),
            f.vecs());
    DKV.put(f);
    makeMetricBuilder(null).makeModelMetrics(this, orig);
    return f;
  }
    // Delete the metrics that match model and/or frame
    ModelMetricsList delete() {
      ModelMetricsList matches = fetch();

      for (ModelMetrics mm : matches._model_metrics) DKV.remove(mm._key);

      return matches;
    }
    // Fetch all metrics that match model and/or frame
    ModelMetricsList fetch() {
      final Key[] modelMetricsKeys =
          KeySnapshot.globalSnapshot()
              .filter(
                  new KeySnapshot.KVFilter() {
                    @Override
                    public boolean filter(KeySnapshot.KeyInfo k) {
                      try {
                        if (!Value.isSubclassOf(k._type, ModelMetrics.class))
                          return false; // Fast-path cutout
                        ModelMetrics mm = DKV.getGet(k._key);
                        // If we're filtering by model filter by Model.  :-)
                        if (_model != null && !mm.isForModel((Model) DKV.getGet(_model._key)))
                          return false;
                        // If we're filtering by frame filter by Frame.  :-)
                        if (_frame != null && !mm.isForFrame((Frame) DKV.getGet(_frame._key)))
                          return false;
                      } catch (NullPointerException | ClassCastException ex) {
                        return false; // Handle all kinds of broken racey key updates
                      }
                      return true;
                    }
                  })
              .keys();

      _model_metrics = new ModelMetrics[modelMetricsKeys.length];
      for (int i = 0; i < modelMetricsKeys.length; i++)
        _model_metrics[i] = DKV.getGet(modelMetricsKeys[i]);
      return this; // Flow coding
    }
Example #14
0
 // Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are
 // asked-for the first time, we make the Key and an empty backing DVec.
 // Touching the DVec will force the file load.
 @Override
 public Value chunkIdx(int cidx) {
   final long nchk = nChunks();
   assert 0 <= cidx && cidx < nchk;
   Key dkey = chunkKey(cidx);
   Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed
   if (val1 != null) return val1; // Found an existing one?
   // Lazily create a DVec for this chunk
   int len = (int) (cidx < nchk - 1 ? ValueArray.CHUNK_SZ : (_len - chunk2StartElem(cidx)));
   // DVec is just the raw file data with a null-compression scheme
   Value val2 = new Value(dkey, len, null, TypeMap.C1CHUNK, Value.NFS);
   val2.setdsk(); // It is already on disk.
   // Atomically insert: fails on a race, but then return the old version
   Value val3 = DKV.DputIfMatch(dkey, val2, null, null);
   return val3 == null ? val2 : val3;
 }
Example #15
0
  @Override
  public void modifyParmsForCrossValidationMainModel(ModelBuilder[] cvModelBuilders) {
    _parms._overwrite_with_best_model = false;

    if (_parms._stopping_rounds == 0 && _parms._max_runtime_secs == 0)
      return; // No exciting changes to stopping conditions
    // Extract stopping conditions from each CV model, and compute the best stopping answer
    _parms._stopping_rounds = 0;
    _parms._max_runtime_secs = 0;
    double sum = 0;
    for (ModelBuilder cvmb : cvModelBuilders)
      sum += ((DeepLearningModel) DKV.getGet(cvmb.dest())).last_scored().epoch_counter;
    _parms._epochs = sum / cvModelBuilders.length;
    if (!_parms._quiet_mode) {
      warn(
          "_epochs",
          "Setting optimal _epochs to "
              + _parms._epochs
              + " for cross-validation main model based on early stopping of cross-validation models.");
      warn(
          "_stopping_rounds",
          "Disabling convergence-based early stopping for cross-validation main model.");
      warn(
          "_max_runtime_secs",
          "Disabling maximum allowed runtime for cross-validation main model.");
    }
  }
Example #16
0
  @Override
  public Response serve() {
    Frame fr = DKV.get(data_key.value()).get();
    if (fr == null) return RequestServer._http404.serve();
    // Build a frame with the selected Vecs
    Frame fr2 = new Frame(new String[0], new Vec[0]);
    int[] idxs = vecs.value();
    for (int idx : idxs) // The selected frame columns
    fr2.add(fr._names[idx], fr._vecs[idx]);
    // Add the class-vec last
    Vec cvec = class_vec.value();
    fr2.add(fr._names[class_vec._colIdx.get()], cvec);
    domain = cvec.domain(); // Class/enum/factor names
    mtrys = features.value() == null ? (int) (Math.sqrt(idxs.length) + 0.5) : features.value();

    DRF drf =
        DRF.start(
            DRF.makeKey(),
            fr2,
            depth.value(),
            ntrees.value(),
            mtrys,
            sample_rate.value(),
            seed.value());

    drf.get(); // Block for result
    cm = drf.cm(); // Get CM result

    return new Response(Response.Status.done, this, -1, -1, null);
  }
Example #17
0
 @Test
 public void testDomains() {
   Frame frame = parse_test_file("smalldata/junit/weather.csv");
   for (String s : new String[] {"MaxWindSpeed", "RelHumid9am", "Cloud9am"}) {
     Vec v = frame.vec(s);
     Vec newV = v.toCategoricalVec();
     frame.remove(s);
     frame.add(s, newV);
     v.remove();
   }
   DKV.put(frame);
   AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
   parms._train = frame._key;
   parms._radius_scale = 10;
   AggregatorModel agg = new Aggregator(parms).trainModel().get();
   Frame output = agg._output._output_frame.get();
   Assert.assertTrue(output.numRows() < 0.5 * frame.numRows());
   boolean same = true;
   for (int i = 0; i < frame.numCols(); ++i) {
     if (frame.vec(i).isCategorical()) {
       same = (frame.domains()[i].length == output.domains()[i].length);
       if (!same) break;
     }
   }
   frame.remove();
   output.remove();
   agg.remove();
   Assert.assertFalse(same);
 }
Example #18
0
 public Frame(String[] names, Vec[] vecs) {
   // assert names==null || names.length == vecs.length : "Number of columns does not match to
   // number of cols' names.";
   _names = names;
   _vecs = vecs;
   _keys = new Key[vecs.length];
   for (int i = 0; i < vecs.length; i++) {
     Key k = _keys[i] = vecs[i]._key;
     if (DKV.get(k) == null) // If not already in KV, put it there
     DKV.put(k, vecs[i]);
   }
   Vec v0 = anyVec();
   if (v0 == null) return;
   VectorGroup grp = v0.group();
   for (int i = 0; i < vecs.length; i++) assert grp.equals(vecs[i].group());
 }
Example #19
0
 protected void testExecFail(String expr, int errorPos) {
   DKV.write_barrier();
   int keys = H2O.store_size();
   try {
     int i = UNIQUE.getAndIncrement();
     System.err.println("result" + (new Integer(i).toString()) + ": " + expr);
     Key key = Exec.exec(expr, "result" + (new Integer(i).toString()));
     UKV.remove(key);
     assertTrue("An exception should have been thrown.", false);
   } catch (ParserException e) {
     assertTrue(false);
   } catch (EvaluationException e) {
     if (errorPos != -1) assertEquals(errorPos, e._pos);
   }
   DKV.write_barrier();
   assertEquals("Keys were not properly deleted for expression " + expr, keys, H2O.store_size());
 }
Example #20
0
 public JobsV3 cancel(int version, JobsV3 c) {
   Job j = DKV.getGet(c.job_id.key());
   if (j == null) {
     throw new IllegalArgumentException("No job with key " + c.job_id.key());
   }
   j.stop(); // Request Job stop
   return c;
 }
Example #21
0
 public static ValueArray loadAndParseKey(Key okey, String path) {
   FileIntegrityChecker c = FileIntegrityChecker.check(new File(path),false);
   Key k = c.syncDirectory(null,null,null,null);
   ParseDataset.forkParseDataset(okey, new Key[] { k }, null).get();
   UKV.remove(k);
   ValueArray res = DKV.get(okey).get();
   return res;
 }
Example #22
0
 @Override
 public byte[] atomic(byte[] bits1) {
   byte[] mem = DKV.get(_key).get();
   int len = Math.max(_dst_off + mem.length, bits1 == null ? 0 : bits1.length);
   byte[] bits2 = MemoryManager.malloc1(len);
   if (bits1 != null) System.arraycopy(bits1, 0, bits2, 0, bits1.length);
   System.arraycopy(mem, 0, bits2, _dst_off, mem.length);
   return bits2;
 }
Example #23
0
 private FrameTask(Key jobKey, Key dinfoKey, int[] activeCols, long seed, int iteration) {
   super(null);
   assert dinfoKey == null || DKV.get(dinfoKey) != null;
   _jobKey = jobKey;
   _dinfoKey = dinfoKey;
   _activeCols = activeCols;
   _seed = seed;
   _iteration = iteration;
 }
Example #24
0
 /** Actually remove/delete all Vecs from memory, not just from the Frame. */
 public void remove(Futures fs) {
   if (_vecs.length > 0) {
     VectorGroup vg = _vecs[0].group();
     for (Vec v : _vecs) UKV.remove(v._key, fs);
     DKV.remove(vg._key);
   }
   _names = new String[0];
   _vecs = new Vec[0];
 }
  @Test
  public void testCategoricalProstate() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS

    try {
      Scope.enter();
      train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
      for (int i = 0; i < cats.length; i++)
        Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key);
      train.remove("ID").remove();
      DKV.put(train._key, train);

      GLRMParameters parms = new GLRMParameters();
      parms._train = train._key;
      parms._k = 8;
      parms._gamma_x = parms._gamma_y = 0.1;
      parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._init = GLRM.Initialization.PlusPlus;
      parms._transform = DataInfo.TransformType.STANDARDIZE;
      parms._recover_svd = false;
      parms._max_iterations = 200;

      try {
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info(
            "Iteration "
                + model._output._iterations
                + ": Objective value = "
                + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info(
            "Numeric Sum of Squared Error = "
                + mm._numerr
                + "\tCategorical Misclassification Error = "
                + mm._caterr);
      } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        job.remove();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (train != null) train.delete();
      if (model != null) model.delete();
      Scope.exit();
    }
  }
Example #26
0
 private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) {
   try {
     if (fs == null) return;
     for (FileStatus file : fs.listStatus(p)) {
       Path pfs = file.getPath();
       if (file.isDir()) {
         addFolder(fs, pfs, succeeded, failed);
       } else {
         Key k = Key.make(pfs.toString());
         long size = file.getLen();
         Value val = null;
         if (pfs.getName().endsWith(Extensions.JSON)) {
           JsonParser parser = new JsonParser();
           JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject();
           JsonElement v = json.get(Constants.VERSION);
           if (v == null) throw new RuntimeException("Missing version");
           JsonElement type = json.get(Constants.TYPE);
           if (type == null) throw new RuntimeException("Missing type");
           Class c = Class.forName(type.getAsString());
           OldModel model = (OldModel) c.newInstance();
           model.fromJson(json);
         } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
           FSDataInputStream s = fs.open(pfs);
           int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg
           byte[] mem = MemoryManager.malloc1(sz);
           s.readFully(mem);
           // Convert to a ValueArray (hope it fits in 1Meg!)
           ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem));
           val = new Value(k, ary, Value.HDFS);
         } else if (size >= 2 * ValueArray.CHUNK_SZ) {
           val =
               new Value(
                   k,
                   new ValueArray(k, size),
                   Value.HDFS); // ValueArray byte wrapper over a large file
         } else {
           val = new Value(k, (int) size, Value.HDFS); // Plain Value
           val.setdsk();
         }
         DKV.put(k, val);
         Log.info("PersistHdfs: DKV.put(" + k + ")");
         JsonObject o = new JsonObject();
         o.addProperty(Constants.KEY, k.toString());
         o.addProperty(Constants.FILE, pfs.toString());
         o.addProperty(Constants.VALUE_SIZE, file.getLen());
         succeeded.add(o);
       }
     }
   } catch (Exception e) {
     Log.err(e);
     JsonObject o = new JsonObject();
     o.addProperty(Constants.FILE, p.toString());
     o.addProperty(Constants.ERROR, e.getMessage());
     failed.add(o);
   }
 }
Example #27
0
 // Close all AppendableVec
 public Futures closeAppendables(Futures fs) {
   _col0 = null; // Reset cache
   int len = vecs().length;
   for (int i = 0; i < len; i++) {
     Vec v = _vecs[i];
     if (v instanceof AppendableVec)
       DKV.put(_keys[i], _vecs[i] = ((AppendableVec) v).close(fs), fs);
   }
   return fs;
 }
Example #28
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;
    if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS)
      error("_nclass", "Too many levels in response column!");

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > 100000)
      error("_ntrees", "Requested ntrees must be between 1 and 100000");
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms._checkpoint) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._model_id);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "Requested ntrees must be between "
                  + checkpointModel._output._ntrees
                  + 1
                  + " and 100000");
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_parms._distribution == Distributions.Family.tweedie) {
      _parms._distribution.tweedie.p = _parms._tweedie_power;
    }
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Example #29
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
      error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._checkpoint);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        try {
          _parms.validateWithCheckpoint(checkpointModel._parms);
        } catch (H2OIllegalArgumentException e) {
          error(e.values.get("argument").toString(), e.values.get("value").toString());
        }
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "If checkpoint is specified then requested ntrees must be higher than "
                  + (checkpointModel._output._ntrees + 1));

        // Compute number of trees to build for this checkpoint
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Example #30
0
  @Override
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {
    QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame fr_wkey = new Frame(fr); // Force a bogus Key for Quantiles ModelBuilder
    DKV.put(fr_wkey);
    parms._train = fr_wkey._key;

    parms._probs = ((ASTNumList) asts[2]).expand();
    for (double d : parms._probs)
      if (d < 0 || d > 1)
        throw new IllegalArgumentException("Probability must be between 0 and 1: " + d);

    String inter = asts[3].exec(env).getStr();
    parms._combine_method = QuantileModel.CombineMethod.valueOf(inter.toUpperCase());
    parms._weights_column = asts[4].str().equals("_") ? null : asts[4].str();

    // Compute Quantiles
    QuantileModel q = new Quantile(parms).trainModel().get();

    // Remove bogus Key
    DKV.remove(fr_wkey._key);

    // Reshape all outputs as a Frame, with probs in col 0 and the
    // quantiles in cols 1 thru fr.numCols() - except the optional weights vec
    int ncols = fr.numCols();
    if (parms._weights_column != null) ncols--;
    Vec[] vecs = new Vec[1 /*1 more for the probs themselves*/ + ncols];
    String[] names = new String[vecs.length];
    vecs[0] = Vec.makeCon(null, parms._probs);
    names[0] = "Probs";
    int w = 0;
    for (int i = 0; i < vecs.length - 1; ++i) {
      if (fr._names[i].equals(parms._weights_column)) w = 1;
      assert (w == 0 || w == 1);
      vecs[i + 1] = Vec.makeCon(null, q._output._quantiles[i]);
      names[i + 1] = fr._names[w + i] + "Quantiles";
    }
    q.delete();

    return new ValFrame(new Frame(names, vecs));
  }