Exemplo n.º 1
0
  @Override
  protected Response serve() {
    try {
      // pull everything local
      Log.info("ExportFiles processing (" + path + ")");
      if (DKV.get(src_key) == null)
        throw new IllegalArgumentException(src_key.toString() + " not found.");
      Object value = DKV.get(src_key).get();
      // create a stream to read the entire VA or Frame
      if (!(value instanceof ValueArray) && !(value instanceof Frame))
        throw new UnsupportedOperationException("Can only export Frames or ValueArrays.");
      InputStream csv =
          value instanceof ValueArray
              ? new ValueArray.CsvVAStream((ValueArray) value, null)
              : ((Frame) value).toCSV(true);
      String p2 = path.toLowerCase();
      if (p2.startsWith("hdfs://")) serveHdfs(csv);
      else if (p2.startsWith("s3n://")) serveHdfs(csv);
      else serveLocalDisk(csv);

      return RequestBuilders.Response.done(this);
    } catch (Throwable t) {
      return RequestBuilders.Response.error(t);
    }
  }
Exemplo n.º 2
0
 @Override
 public float progress() {
   if (DKV.get(dest()) == null) return 0;
   GLMModel m = DKV.get(dest()).get();
   float progress = (float) m.iteration() / (float) max_iter; // TODO, do something smarter here
   return progress;
 }
Exemplo n.º 3
0
  /**
   * Score a frame with the given model and return just the metrics.
   *
   * <p>NOTE: ModelMetrics are now always being created by model.score. . .
   */
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public ModelMetricsListSchemaV3 score(int version, ModelMetricsListSchemaV3 s) {
    // parameters checking:
    if (null == s.model) throw new H2OIllegalArgumentException("model", "predict", s.model);
    if (null == DKV.get(s.model.name))
      throw new H2OKeyNotFoundArgumentException("model", "predict", s.model.name);

    if (null == s.frame) throw new H2OIllegalArgumentException("frame", "predict", s.frame);
    if (null == DKV.get(s.frame.name))
      throw new H2OKeyNotFoundArgumentException("frame", "predict", s.frame.name);

    ModelMetricsList parms = s.createAndFillImpl();
    parms
        ._model
        .score(parms._frame, parms._predictions_name)
        .remove(); // throw away predictions, keep metrics as a side-effect
    ModelMetricsListSchemaV3 mm = this.fetch(version, s);

    // TODO: for now only binary predictors write an MM object.
    // For the others cons one up here to return the predictions frame.
    if (null == mm) mm = new ModelMetricsListSchemaV3();

    if (null == mm.model_metrics || 0 == mm.model_metrics.length) {
      Log.warn(
          "Score() did not return a ModelMetrics for model: " + s.model + " on frame: " + s.frame);
    }

    return mm;
  }
Exemplo n.º 4
0
  /** Score a frame with the given model and return the metrics AND the prediction frame. */
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public JobV3 predict2(int version, final ModelMetricsListSchemaV3 s) {
    // parameters checking:
    if (null == s.model) throw new H2OIllegalArgumentException("model", "predict", s.model);
    if (null == DKV.get(s.model.name))
      throw new H2OKeyNotFoundArgumentException("model", "predict", s.model.name);

    if (null == s.frame) throw new H2OIllegalArgumentException("frame", "predict", s.frame);
    if (null == DKV.get(s.frame.name))
      throw new H2OKeyNotFoundArgumentException("frame", "predict", s.frame.name);

    final ModelMetricsList parms = s.createAndFillImpl();

    // predict2 does not return modelmetrics, so cannot handle deeplearning: reconstruction_error
    // (anomaly) or GLRM: reconstruct and archetypes
    // predict2 can handle deeplearning: deepfeatures and predict

    if (s.deep_features_hidden_layer > 0) {
      if (null == parms._predictions_name)
        parms._predictions_name =
            "deep_features"
                + Key.make().toString().substring(0, 5)
                + "_"
                + parms._model._key.toString()
                + "_on_"
                + parms._frame._key.toString();
    } else if (null == parms._predictions_name)
      parms._predictions_name =
          "predictions"
              + Key.make().toString().substring(0, 5)
              + "_"
              + parms._model._key.toString()
              + "_on_"
              + parms._frame._key.toString();

    final Job<Frame> j =
        new Job(Key.make(parms._predictions_name), Frame.class.getName(), "prediction");

    H2O.H2OCountedCompleter work =
        new H2O.H2OCountedCompleter() {
          @Override
          public void compute2() {
            if (s.deep_features_hidden_layer < 0) {
              parms._model.score(parms._frame, parms._predictions_name, j);
            } else {
              Frame predictions =
                  ((Model.DeepFeatures) parms._model)
                      .scoreDeepFeatures(parms._frame, s.deep_features_hidden_layer, j);
              predictions =
                  new Frame(
                      Key.make(parms._predictions_name), predictions.names(), predictions.vecs());
              DKV.put(predictions._key, predictions);
            }
            tryComplete();
          }
        };
    j.start(work, parms._frame.anyVec().nChunks());
    return new JobV3().fillFromImpl(j);
  }
Exemplo n.º 5
0
 public static Response redirect(JsonObject fromPageResponse, Key rfModelKey) {
   RFModel rfModel = DKV.get(rfModelKey).get();
   ValueArray data = DKV.get(rfModel._dataKey).get();
   return redirect(
       fromPageResponse,
       null,
       rfModelKey,
       rfModel._dataKey,
       rfModel._totalTrees,
       data.numCols() - 1,
       null,
       true,
       false);
 }
Exemplo n.º 6
0
  @Override
  public Response serve() {
    Frame fr = DKV.get(data_key.value()).get();
    if (fr == null) return RequestServer._http404.serve();
    // Build a frame with the selected Vecs
    Frame fr2 = new Frame(new String[0], new Vec[0]);
    int[] idxs = vecs.value();
    for (int idx : idxs) // The selected frame columns
    fr2.add(fr._names[idx], fr._vecs[idx]);
    // Add the class-vec last
    Vec cvec = class_vec.value();
    fr2.add(fr._names[class_vec._colIdx.get()], cvec);
    domain = cvec.domain(); // Class/enum/factor names
    mtrys = features.value() == null ? (int) (Math.sqrt(idxs.length) + 0.5) : features.value();

    DRF drf =
        DRF.start(
            DRF.makeKey(),
            fr2,
            depth.value(),
            ntrees.value(),
            mtrys,
            sample_rate.value(),
            seed.value());

    drf.get(); // Block for result
    cm = drf.cm(); // Get CM result

    return new Response(Response.Status.done, this, -1, -1, null);
  }
Exemplo n.º 7
0
 public static ValueArray loadAndParseKey(Key okey, String path) {
   FileIntegrityChecker c = FileIntegrityChecker.check(new File(path),false);
   Key k = c.syncDirectory(null,null,null,null);
   ParseDataset.forkParseDataset(okey, new Key[] { k }, null).get();
   UKV.remove(k);
   ValueArray res = DKV.get(okey).get();
   return res;
 }
Exemplo n.º 8
0
 @Override
 public byte[] atomic(byte[] bits1) {
   byte[] mem = DKV.get(_key).get();
   int len = Math.max(_dst_off + mem.length, bits1 == null ? 0 : bits1.length);
   byte[] bits2 = MemoryManager.malloc1(len);
   if (bits1 != null) System.arraycopy(bits1, 0, bits2, 0, bits1.length);
   System.arraycopy(mem, 0, bits2, _dst_off, mem.length);
   return bits2;
 }
Exemplo n.º 9
0
 private FrameTask(Key jobKey, Key dinfoKey, int[] activeCols, long seed, int iteration) {
   super(null);
   assert dinfoKey == null || DKV.get(dinfoKey) != null;
   _jobKey = jobKey;
   _dinfoKey = dinfoKey;
   _activeCols = activeCols;
   _seed = seed;
   _iteration = iteration;
 }
Exemplo n.º 10
0
 public Vec replace(int col, Vec nv) {
   assert col < _names.length;
   Vec rv = vecs()[col];
   assert rv.group().equals(nv.group());
   _vecs[col] = nv;
   _keys[col] = nv._key;
   if (DKV.get(nv._key) == null) // If not already in KV, put it there
   DKV.put(nv._key, nv);
   return rv;
 }
Exemplo n.º 11
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;
    if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS)
      error("_nclass", "Too many levels in response column!");

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > 100000)
      error("_ntrees", "Requested ntrees must be between 1 and 100000");
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms._checkpoint) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._model_id);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "Requested ntrees must be between "
                  + checkpointModel._output._ntrees
                  + 1
                  + " and 100000");
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_parms._distribution == Distributions.Family.tweedie) {
      _parms._distribution.tweedie.p = _parms._tweedie_power;
    }
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Exemplo n.º 12
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
      error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._checkpoint);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        try {
          _parms.validateWithCheckpoint(checkpointModel._parms);
        } catch (H2OIllegalArgumentException e) {
          error(e.values.get("argument").toString(), e.values.get("value").toString());
        }
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "If checkpoint is specified then requested ntrees must be higher than "
                  + (checkpointModel._output._ntrees + 1));

        // Compute number of trees to build for this checkpoint
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Exemplo n.º 13
0
  // Test kaggle/creditsample-test data
  @org.junit.Test
  public void kaggle_credit() {
    Key okey = loadAndParseFile("credit.hex", "smalldata/kaggle/creditsample-training.csv.gz");
    UKV.remove(Key.make("smalldata/kaggle/creditsample-training.csv.gz_UNZIPPED"));
    UKV.remove(Key.make("smalldata\\kaggle\\creditsample-training.csv.gz_UNZIPPED"));
    ValueArray val = DKV.get(okey).get();

    // Check parsed dataset
    final int n = new int[] {4, 2, 1}[ValueArray.LOG_CHK - 20];
    assertEquals("Number of chunks", n, val.chunks());
    assertEquals("Number of rows", 150000, val.numRows());
    assertEquals("Number of cols", 12, val.numCols());

    // setup default values for DRF
    int ntrees = 3;
    int depth = 30;
    int gini = StatType.GINI.ordinal();
    int seed = 42;
    StatType statType = StatType.values()[gini];
    final int cols[] =
        new int[] {0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 1}; // ignore column 6, classify column 1

    // Start the distributed Random Forest
    final Key modelKey = Key.make("model");
    DRFJob result =
        hex.rf.DRF.execute(
            modelKey,
            cols,
            val,
            ntrees,
            depth,
            1024,
            statType,
            seed,
            true,
            null,
            -1,
            Sampling.Strategy.RANDOM,
            1.0f,
            null,
            0,
            0,
            false);
    // Wait for completion on all nodes
    RFModel model = result.get();

    assertEquals("Number of classes", 2, model.classes());
    assertEquals("Number of trees", ntrees, model.size());

    model.deleteKeys();
    UKV.remove(modelKey);
    UKV.remove(okey);
  }
Exemplo n.º 14
0
  @Override
  public void compute2() {
    String path = null; // getPathFromValue(val);
    ValueArray ary = DKV.get(_arykey).get();
    Key self = selfKey();

    while (_indexFrom < ary.chunks()) {
      Key ckey = ary.getChunkKey(_indexFrom++);
      if (!ckey.home()) { // Next chunk not At Home?
        RPC.call(chunkHome(), this); // Hand the baton off to the next node/chunk
        return;
      }
      Value val = DKV.get(ckey); // It IS home, so get the data
      _err = PersistHdfs.appendChunk(_arykey, val);
      if (_err != null) return;
      UKV.put(self, this); // Update the progress/self key
    }
    // We did the last chunk.  Removing the selfKey is the signal to the web
    // thread that All Done.
    UKV.remove(self);
  }
Exemplo n.º 15
0
  // TODO: almost identical to ModelsHandler; refactor
  public static ModelMetrics getFromDKV(Key key) {
    if (null == key) throw new IllegalArgumentException("Got null key.");

    Value v = DKV.get(key);
    if (null == v) throw new IllegalArgumentException("Did not find key: " + key.toString());

    Iced ice = v.get();
    if (!(ice instanceof ModelMetrics))
      throw new IllegalArgumentException(
          "Expected a Model for key: " + key.toString() + "; got a: " + ice.getClass());

    return (ModelMetrics) ice;
  }
Exemplo n.º 16
0
 @Override
 protected void setupLocal() {
   _model_mem_size = 0;
   for (int i = 0; i < trees_so_far; ++i) {
     Key<CompressedTree>[] per_class = _treeKeys[i];
     for (int j = 0; j < per_class.length; ++j) {
       if (per_class[j] == null) continue;
       if (!per_class[j].home()) continue;
       // only look at homed tree keys
       _model_mem_size += DKV.get(per_class[j])._max;
     }
   }
 }
Exemplo n.º 17
0
 public static Frame createFrame(String fname, long[] chunkLayout, String[][] data) {
   Frame f = new Frame(Key.make(fname));
   f.preparePartialFrame(new String[] {"C0"});
   f.update(null);
   // Create chunks
   for (int i = 0; i < chunkLayout.length; i++) {
     createNC(fname, data[i], i, (int) chunkLayout[i]);
   }
   // Reload frame from DKV
   f = DKV.get(fname).get();
   // Finalize frame
   f.finalizePartialFrame(chunkLayout, new String[][] {null}, new byte[] {Vec.T_STR});
   return f;
 }
Exemplo n.º 18
0
  @Test
  public void testChunks() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 3.0;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.418
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==1993);
    output.remove();
    agg.remove();

    for (int i : new int[] {1, 2, 5, 10, 50, 100}) {
      Key key = Key.make();
      RebalanceDataSet rb = new RebalanceDataSet(frame, key, i);
      H2O.submitTask(rb);
      rb.join();
      Frame rebalanced = DKV.get(key).get();

      parms = new AggregatorModel.AggregatorParameters();
      parms._train = frame._key;
      parms._radius_scale = 3.0;
      start = System.currentTimeMillis();
      AggregatorModel agg2 =
          new Aggregator(parms).trainModel().get(); // 0.373 0.504 0.357 0.454 0.368 0.355
      System.out.println(
          "AggregatorModel finished in: "
              + (System.currentTimeMillis() - start) / 1000.
              + " seconds");
      agg2.checkConsistency();
      Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length);
      rebalanced.delete();
      Assert.assertTrue(
          Math.abs(agg._exemplars.length - agg2._exemplars.length)
              == 0); // < agg._exemplars.length*0);
      output = agg2._output._output_frame.get();
      output.remove();
      agg2.remove();
    }
    frame.delete();
  }
Exemplo n.º 19
0
 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   final Path p;
   if (_iceRoot != null) {
     p = new Path(_iceRoot, getIceName(v));
   } else {
     // Convert an arraylet chunk into a long-offset from the base file.
     if (k._kb[0] == Key.ARRAYLET_CHUNK) {
       skip = ValueArray.getChunkOffset(k); // The offset
       k = ValueArray.getArrayKey(k); // From the base file key
       if (k.toString().endsWith(Extensions.HEX)) { // Hex file?
         int value_len = DKV.get(k).memOrLoad().length; // How long is the ValueArray header?
         skip += value_len;
       }
     }
     p = new Path(k.toString());
   }
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }
Exemplo n.º 20
0
 // Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are
 // asked-for the first time, we make the Key and an empty backing DVec.
 // Touching the DVec will force the file load.
 @Override
 public Value chunkIdx(int cidx) {
   final long nchk = nChunks();
   assert 0 <= cidx && cidx < nchk;
   Key dkey = chunkKey(cidx);
   Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed
   if (val1 != null) return val1; // Found an existing one?
   // Lazily create a DVec for this chunk
   int len = (int) (cidx < nchk - 1 ? ValueArray.CHUNK_SZ : (_len - chunk2StartElem(cidx)));
   // DVec is just the raw file data with a null-compression scheme
   Value val2 = new Value(dkey, len, null, TypeMap.C1CHUNK, Value.NFS);
   val2.setdsk(); // It is already on disk.
   // Atomically insert: fails on a race, but then return the old version
   Value val3 = DKV.DputIfMatch(dkey, val2, null, null);
   return val3 == null ? val2 : val3;
 }
Exemplo n.º 21
0
 public Frame(String[] names, Vec[] vecs) {
   // assert names==null || names.length == vecs.length : "Number of columns does not match to
   // number of cols' names.";
   _names = names;
   _vecs = vecs;
   _keys = new Key[vecs.length];
   for (int i = 0; i < vecs.length; i++) {
     Key k = _keys[i] = vecs[i]._key;
     if (DKV.get(k) == null) // If not already in KV, put it there
     DKV.put(k, vecs[i]);
   }
   Vec v0 = anyVec();
   if (v0 == null) return;
   VectorGroup grp = v0.group();
   for (int i = 0; i < vecs.length; i++) assert grp.equals(vecs[i].group());
 }
Exemplo n.º 22
0
  public static String store2Hdfs(Key srcKey) {
    assert srcKey._kb[0] != Key.ARRAYLET_CHUNK;
    assert PersistHdfs.getPathForKey(srcKey) != null; // Validate key name
    Value v = DKV.get(srcKey);
    if (v == null) return "Key " + srcKey + " not found";
    if (v._isArray == 0) { // Simple chunk?
      v.setHdfs(); // Set to HDFS and be done
      return null; // Success
    }

    // For ValueArrays, make the .hex header
    ValueArray ary = ValueArray.value(v);
    String err = PersistHdfs.freeze(srcKey, ary);
    if (err != null) return err;

    // The task managing which chunks to write next,
    // store in a known key
    TaskStore2HDFS ts = new TaskStore2HDFS(srcKey);
    Key selfKey = ts.selfKey();
    UKV.put(selfKey, ts);

    // Then start writing chunks in-order with the zero chunk
    H2ONode chk0_home = ValueArray.getChunkKey(0, srcKey).home_node();
    RPC.call(ts.chunkHome(), ts);

    // Watch the progress key until it gets removed or an error appears
    long idx = 0;
    while (UKV.get(selfKey, ts) != null) {
      if (ts._indexFrom != idx) {
        System.out.print(" " + idx + "/" + ary.chunks());
        idx = ts._indexFrom;
      }
      if (ts._err != null) { // Found an error?
        UKV.remove(selfKey); // Cleanup & report
        return ts._err;
      }
      try {
        Thread.sleep(100);
      } catch (InterruptedException e) {
      }
    }
    System.out.println(" " + ary.chunks() + "/" + ary.chunks());

    // PersistHdfs.refreshHDFSKeys();
    return null;
  }
Exemplo n.º 23
0
 /**
  * Performs deep clone of given model.
  *
  * <p>FIXME: fetch all data to the caller node
  */
 protected M getModelDeepClone(M model) {
   M newModel = IcedUtils.clone(model, _dest);
   // Do not clone model metrics
   newModel._output._model_metrics = new Key[0];
   newModel._output._training_metrics = null;
   newModel._output._validation_metrics = null;
   // Clone trees
   Key[][] treeKeys = newModel._output._treeKeys;
   for (int i = 0; i < treeKeys.length; i++) {
     for (int j = 0; j < treeKeys[i].length; j++) {
       if (treeKeys[i][j] == null) continue;
       ;
       CompressedTree ct = DKV.get(treeKeys[i][j]).get();
       CompressedTree newCt = IcedUtils.clone(ct, CompressedTree.makeTreeKey(i, j), true);
       treeKeys[i][j] = newCt._key;
     }
   }
   return newModel;
 }
Exemplo n.º 24
0
 public Key importFile(int i, Futures fs) {
   if (_ok[i] < H2O.CLOUD.size()) return null;
   File f = new File(_files[i]);
   Key k;
   if (_newApi) {
     k = PersistNFS.decodeFile(f);
     NFSFileVec nfs = DKV.get(NFSFileVec.make(f, fs)).get();
     UKV.put(k, new Frame(new String[] {"0"}, new Vec[] {nfs}), fs);
   } else {
     k = PersistNFS.decodeFile(f);
     long size = f.length();
     Value val =
         (size < 2 * ValueArray.CHUNK_SZ)
             ? new Value(k, (int) size, Value.NFS)
             : new Value(k, new ValueArray(k, size), Value.NFS);
     val.setdsk();
     UKV.put(k, val, fs);
   }
   return k;
 }
Exemplo n.º 25
0
  public ModelMetricsGLRM scoreMetricsOnly(Frame frame) {
    final int ncols = _output._names.length;

    // Need [A,X] where A = adapted test frame, X = loading frame
    // Note: A is adapted to original training frame
    Frame adaptedFr = new Frame(frame);
    adaptTestForTrain(adaptedFr, true, false);
    assert ncols == adaptedFr.numCols();

    // Append loading frame X for calculating XY
    Frame fullFrm = new Frame(adaptedFr);
    Frame loadingFrm = DKV.get(_output._representation_key).get();
    fullFrm.add(loadingFrm);

    GLRMScore gs = new GLRMScore(ncols, _parms._k, false).doAll(fullFrm);
    ModelMetrics mm =
        gs._mb.makeModelMetrics(
            GLRMModel.this, adaptedFr, null, null); // save error metrics based on imputed data
    return (ModelMetricsGLRM) mm;
  }
Exemplo n.º 26
0
 @Override
 public void map(Key key) {
   _rows = new long[_clusters.length];
   _dist = new double[_clusters.length];
   assert key.home();
   ValueArray va = DKV.get(_arykey).get();
   AutoBuffer bits = va.getChunk(key);
   int rows = va.rpc(ValueArray.getChunkIndex(key));
   double[] values = new double[_cols.length - 1];
   ClusterDist cd = new ClusterDist();
   for (int row = 0; row < rows; row++) {
     KMeans.datad(va, bits, row, _cols, _normalized, values);
     KMeans.closest(_clusters, values, cd);
     _rows[cd._cluster]++;
     _dist[cd._cluster] += cd._dist;
   }
   _arykey = null;
   _cols = null;
   _clusters = null;
 }
Exemplo n.º 27
0
  @Override
  public void parse(Key key) throws IOException {
    _firstRow = true;
    InputStream is = DKV.get(key).openStream();
    try {
      _fs = new POIFSFileSystem(is);
      MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
      _formatListener = new FormatTrackingHSSFListener(listener);

      HSSFEventFactory factory = new HSSFEventFactory();
      HSSFRequest request = new HSSFRequest();
      request.addListenerForAllRecords(_formatListener);

      factory.processWorkbookEvents(request, _fs);
    } finally {
      try {
        is.close();
      } catch (IOException e) {
      }
    }
  }
Exemplo n.º 28
0
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public JobsV3 fetch(int version, JobsV3 s) {
    Key key = s.job_id.key();
    Value val = DKV.get(key);
    if (null == val) throw new IllegalArgumentException("Job is missing");
    Iced ice = val.get();
    if (!(ice instanceof Job))
      throw new IllegalArgumentException("Must be a Job not a " + ice.getClass());

    Job j = (Job) ice;
    s.jobs = new JobV3[1];
    // s.fillFromImpl(jobs);
    try {
      s.jobs[0] = (JobV3) Schema.schema(version, j).fillFromImpl(j);
    }
    // no special schema for this job subclass, so fall back to JobV3
    catch (H2ONotFoundArgumentException e) {
      s.jobs[0] = new JobV3().fillFromImpl(j);
    }
    return s;
  }
Exemplo n.º 29
0
 // Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are
 // asked-for the first time, we make the Key and an empty backing DVec.
 // Touching the DVec will force the file load.
 @Override
 public Value chunkIdx(int cidx) {
   final long nchk = nChunks();
   assert 0 <= cidx && cidx < nchk;
   Key dkey = chunkKey(cidx);
   Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed
   if (val1 != null) return val1; // Found an existing one?
   // Lazily create a DVec for this chunk
   int len = (int) (cidx < nchk - 1 ? CHUNK_SZ : (_len - chunk2StartElem(cidx)));
   // DVec is just the raw file data with a null-compression scheme
   Value val2 = new Value(dkey, len, null, TypeMap.C1NCHUNK, _be);
   val2.setdsk(); // It is already on disk.
   // If not-home, then block till the Key is everywhere.  Most calls here are
   // from the parser loading a text file, and the parser splits the work such
   // that most puts here are on home - so this is a simple speed optimization:
   // do not make a Futures nor block on it on home.
   Futures fs = dkey.home() ? null : new Futures();
   // Atomically insert: fails on a race, but then return the old version
   Value val3 = DKV.DputIfMatch(dkey, val2, null, fs);
   if (!dkey.home() && fs != null) fs.blockForPending();
   return val3 == null ? val2 : val3;
 }
Exemplo n.º 30
0
 static boolean checkSaneFrame_impl() {
   for (Key k : H2O.localKeySet()) {
     Value val = H2O.raw_get(k);
     if (val.isFrame()) {
       Frame fr = val.get();
       Vec vecs[] = fr.vecs();
       for (int i = 0; i < vecs.length; i++) {
         Vec v = vecs[i];
         if (DKV.get(v._key) == null) {
           System.err.println(
               "Frame "
                   + fr._key
                   + " in the DKV, is missing Vec "
                   + v._key
                   + ", name="
                   + fr._names[i]);
           return false;
         }
       }
     }
   }
   return true;
 }