Exemple #1
0
 @Override
 public Value lazyArrayChunk(final Key key) {
   final Key arykey = ValueArray.getArrayKey(key); // From the base file key
   final long off = (_iceRoot != null) ? 0 : ValueArray.getChunkOffset(key); // The offset
   final Path p =
       (_iceRoot != null)
           ? new Path(_iceRoot, getIceName(key, (byte) 'V'))
           : new Path(arykey.toString());
   final Size sz = new Size();
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           long rem = fs.getFileStatus(p).getLen() - off;
           sz._value = (rem > ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem;
           return null;
         }
       },
       true,
       0);
   Value val = new Value(key, sz._value, Value.HDFS);
   val.setdsk(); // But its already on disk.
   return val;
 }
Exemple #2
0
  @Override
  public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) {
    if (type == RequestType.json) {
      JsonObject resp = new JsonObject();
      resp.addProperty(ERROR, "This request is only provided for browser connections");
      return wrap(server, resp);
    } else if (type != RequestType.www) {
      return super.serve(server, args, type);
    }

    String query = checkArguments(args, type);
    if (query != null) return wrap(server, query, type);
    try {
      Value val = _key.value();
      Key key = val._key;
      if (!key.user_allowed()) return wrap(server, build(Response.error("Not a user key: " + key)));
      // HTML file save of Value
      NanoHTTPD.Response res =
          server.new Response(NanoHTTPD.HTTP_OK, NanoHTTPD.MIME_DEFAULT_BINARY, val.openStream());
      res.addHeader("Content-Length", Long.toString(val.length()));
      res.addHeader("Content-Disposition", "attachment; filename=" + key.toString());
      return res;
    } catch (Throwable e) {
      return wrap(server, build(Response.error(e)));
    }
  }
Exemple #3
0
 private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) {
   try {
     if (fs == null) return;
     for (FileStatus file : fs.listStatus(p)) {
       Path pfs = file.getPath();
       if (file.isDir()) {
         addFolder(fs, pfs, succeeded, failed);
       } else {
         Key k = Key.make(pfs.toString());
         long size = file.getLen();
         Value val = null;
         if (pfs.getName().endsWith(Extensions.JSON)) {
           JsonParser parser = new JsonParser();
           JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject();
           JsonElement v = json.get(Constants.VERSION);
           if (v == null) throw new RuntimeException("Missing version");
           JsonElement type = json.get(Constants.TYPE);
           if (type == null) throw new RuntimeException("Missing type");
           Class c = Class.forName(type.getAsString());
           OldModel model = (OldModel) c.newInstance();
           model.fromJson(json);
         } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
           FSDataInputStream s = fs.open(pfs);
           int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg
           byte[] mem = MemoryManager.malloc1(sz);
           s.readFully(mem);
           // Convert to a ValueArray (hope it fits in 1Meg!)
           ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem));
           val = new Value(k, ary, Value.HDFS);
         } else if (size >= 2 * ValueArray.CHUNK_SZ) {
           val =
               new Value(
                   k,
                   new ValueArray(k, size),
                   Value.HDFS); // ValueArray byte wrapper over a large file
         } else {
           val = new Value(k, (int) size, Value.HDFS); // Plain Value
           val.setdsk();
         }
         DKV.put(k, val);
         Log.info("PersistHdfs: DKV.put(" + k + ")");
         JsonObject o = new JsonObject();
         o.addProperty(Constants.KEY, k.toString());
         o.addProperty(Constants.FILE, pfs.toString());
         o.addProperty(Constants.VALUE_SIZE, file.getLen());
         succeeded.add(o);
       }
     }
   } catch (Exception e) {
     Log.err(e);
     JsonObject o = new JsonObject();
     o.addProperty(Constants.FILE, p.toString());
     o.addProperty(Constants.ERROR, e.getMessage());
     failed.add(o);
   }
 }
Exemple #4
0
 @Override
 public void store(Value v) {
   // Should be used only if ice goes to HDFS
   assert this == getIce();
   assert !v.isPersisted();
   byte[] m = v.memOrLoad();
   assert (m == null || m.length == v._max); // Assert not saving partial files
   store(new Path(_iceRoot, getIceName(v)), m);
   v.setdsk(); // Set as write-complete to disk
 }
Exemple #5
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
      error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._checkpoint);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        try {
          _parms.validateWithCheckpoint(checkpointModel._parms);
        } catch (H2OIllegalArgumentException e) {
          error(e.values.get("argument").toString(), e.values.get("value").toString());
        }
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "If checkpoint is specified then requested ntrees must be higher than "
                  + (checkpointModel._output._ntrees + 1));

        // Compute number of trees to build for this checkpoint
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Exemple #6
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;
    if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS)
      error("_nclass", "Too many levels in response column!");

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > 100000)
      error("_ntrees", "Requested ntrees must be between 1 and 100000");
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms._checkpoint) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._model_id);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "Requested ntrees must be between "
                  + checkpointModel._output._ntrees
                  + 1
                  + " and 100000");
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_parms._distribution == Distributions.Family.tweedie) {
      _parms._distribution.tweedie.p = _parms._tweedie_power;
    }
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
  // TODO: almost identical to ModelsHandler; refactor
  public static ModelMetrics getFromDKV(Key key) {
    if (null == key) throw new IllegalArgumentException("Got null key.");

    Value v = DKV.get(key);
    if (null == v) throw new IllegalArgumentException("Did not find key: " + key.toString());

    Iced ice = v.get();
    if (!(ice instanceof ModelMetrics))
      throw new IllegalArgumentException(
          "Expected a Model for key: " + key.toString() + "; got a: " + ice.getClass());

    return (ModelMetrics) ice;
  }
Exemple #8
0
  @Override
  public Value lazyArrayChunk(Key key) {
    Key arykey = ValueArray.getArrayKey(key); // From the base file key
    long off = ValueArray.getChunkOffset(key); // The offset
    long size = getFileForKey(arykey).length();
    long rem = size - off;

    // the last chunk can be fat, so it got packed into the earlier chunk
    if (rem < ValueArray.CHUNK_SZ && off > 0) return null;
    int sz = (rem >= ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem;
    Value val = new Value(key, sz, Value.NFS);
    val.setdsk(); // But its already on disk.
    return val;
  }
Exemple #9
0
 // Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are
 // asked-for the first time, we make the Key and an empty backing DVec.
 // Touching the DVec will force the file load.
 @Override
 public Value chunkIdx(int cidx) {
   final long nchk = nChunks();
   assert 0 <= cidx && cidx < nchk;
   Key dkey = chunkKey(cidx);
   Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed
   if (val1 != null) return val1; // Found an existing one?
   // Lazily create a DVec for this chunk
   int len = (int) (cidx < nchk - 1 ? ValueArray.CHUNK_SZ : (_len - chunk2StartElem(cidx)));
   // DVec is just the raw file data with a null-compression scheme
   Value val2 = new Value(dkey, len, null, TypeMap.C1CHUNK, Value.NFS);
   val2.setdsk(); // It is already on disk.
   // Atomically insert: fails on a race, but then return the old version
   Value val3 = DKV.DputIfMatch(dkey, val2, null, null);
   return val3 == null ? val2 : val3;
 }
Exemple #10
0
 // Read up to 'len' bytes of Value. Value should already be persisted to
 // disk.  A racing delete can trigger a failure where we get a null return,
 // but no crash (although one could argue that a racing load&delete is a bug
 // no matter what).
 @Override
 public byte[] load(Value v) {
   long skip = 0;
   Key k = v._key;
   // Convert an arraylet chunk into a long-offset from the base file.
   if (k._kb[0] == Key.ARRAYLET_CHUNK) {
     skip = ValueArray.getChunkOffset(k); // The offset
     k = ValueArray.getArrayKey(k); // From the base file key
   }
   if (k._kb[0] == Key.DVEC) {
     skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset
   }
   try {
     FileInputStream s = null;
     try {
       s = new FileInputStream(getFileForKey(k));
       FileChannel fc = s.getChannel();
       fc.position(skip);
       AutoBuffer ab = new AutoBuffer(fc, true, Value.NFS);
       byte[] b = ab.getA1(v._max);
       ab.close();
       assert v.isPersisted();
       return b;
     } finally {
       if (s != null) s.close();
     }
   } catch (IOException e) { // Broken disk / short-file???
     H2O.ignore(e);
     return null;
   }
 }
Exemple #11
0
  public static String store2Hdfs(Key srcKey) {
    assert srcKey._kb[0] != Key.ARRAYLET_CHUNK;
    assert PersistHdfs.getPathForKey(srcKey) != null; // Validate key name
    Value v = DKV.get(srcKey);
    if (v == null) return "Key " + srcKey + " not found";
    if (v._isArray == 0) { // Simple chunk?
      v.setHdfs(); // Set to HDFS and be done
      return null; // Success
    }

    // For ValueArrays, make the .hex header
    ValueArray ary = ValueArray.value(v);
    String err = PersistHdfs.freeze(srcKey, ary);
    if (err != null) return err;

    // The task managing which chunks to write next,
    // store in a known key
    TaskStore2HDFS ts = new TaskStore2HDFS(srcKey);
    Key selfKey = ts.selfKey();
    UKV.put(selfKey, ts);

    // Then start writing chunks in-order with the zero chunk
    H2ONode chk0_home = ValueArray.getChunkKey(0, srcKey).home_node();
    RPC.call(ts.chunkHome(), ts);

    // Watch the progress key until it gets removed or an error appears
    long idx = 0;
    while (UKV.get(selfKey, ts) != null) {
      if (ts._indexFrom != idx) {
        System.out.print(" " + idx + "/" + ary.chunks());
        idx = ts._indexFrom;
      }
      if (ts._err != null) { // Found an error?
        UKV.remove(selfKey); // Cleanup & report
        return ts._err;
      }
      try {
        Thread.sleep(100);
      } catch (InterruptedException e) {
      }
    }
    System.out.println(" " + ary.chunks() + "/" + ary.chunks());

    // PersistHdfs.refreshHDFSKeys();
    return null;
  }
 public Key importFile(int i, Futures fs) {
   if (_ok[i] < H2O.CLOUD.size()) return null;
   File f = new File(_files[i]);
   Key k;
   if (_newApi) {
     k = PersistNFS.decodeFile(f);
     NFSFileVec nfs = DKV.get(NFSFileVec.make(f, fs)).get();
     UKV.put(k, new Frame(new String[] {"0"}, new Vec[] {nfs}), fs);
   } else {
     k = PersistNFS.decodeFile(f);
     long size = f.length();
     Value val =
         (size < 2 * ValueArray.CHUNK_SZ)
             ? new Value(k, (int) size, Value.NFS)
             : new Value(k, new ValueArray(k, size), Value.NFS);
     val.setdsk();
     UKV.put(k, val, fs);
   }
   return k;
 }
Exemple #13
0
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public JobsV3 fetch(int version, JobsV3 s) {
    Key key = s.job_id.key();
    Value val = DKV.get(key);
    if (null == val) throw new IllegalArgumentException("Job is missing");
    Iced ice = val.get();
    if (!(ice instanceof Job))
      throw new IllegalArgumentException("Must be a Job not a " + ice.getClass());

    Job j = (Job) ice;
    s.jobs = new JobV3[1];
    // s.fillFromImpl(jobs);
    try {
      s.jobs[0] = (JobV3) Schema.schema(version, j).fillFromImpl(j);
    }
    // no special schema for this job subclass, so fall back to JobV3
    catch (H2ONotFoundArgumentException e) {
      s.jobs[0] = new JobV3().fillFromImpl(j);
    }
    return s;
  }
Exemple #14
0
 /**
  * Executes the phase one of the parser.
  *
  * <p>First phase detects the encoding and basic statistics of the parsed dataset.
  *
  * <p>For CSV parsers it detects the parser setup and then launches the distributed computation on
  * per chunk basis.
  *
  * <p>For XLS and XLSX parsers that do not work in distrubuted way parses the whole datasets.
  *
  * @throws Exception
  */
 public void passOne(CsvParser.Setup setup) throws Exception {
   switch (_parserType) {
     case CSV:
       // precompute the parser setup, column setup and other settings
       byte[] bits = _sourceDataset.getFirstBytes(); // Can limit to eg 256*1024
       if (setup == null) setup = CsvParser.guessCsvSetup(bits);
       if (setup._data == null) {
         _error = "Unable to determine the separator or number of columns on the dataset";
         return;
       }
       _colNames = setup._data[0];
       setColumnNames(_colNames);
       _skipFirstLine = setup._header;
       // set the separator
       this._sep = setup._separator;
       // if parsing value array, initialize the nrows array
       if (_sourceDataset._isArray != 0) {
         ValueArray ary = ValueArray.value(_sourceDataset);
         _nrows = new int[(int) ary.chunks()];
       }
       // launch the distributed parser on its chunks.
       this.invoke(_sourceDataset._key);
       break;
     case XLS:
       // XLS parsing is not distributed, just obtain the value stream and
       // run the parser
       CustomParser p = new XlsParser(this);
       p.parse(_sourceDataset._key);
       --_myrows; // do not count the header
       break;
     case XLSX:
       // XLS parsing is not distributed, just obtain the value stream and
       // run the parser
       CustomParser px = new XlsxParser(this);
       px.parse(_sourceDataset._key);
       break;
     default:
       throw new Error("NOT IMPLEMENTED");
   }
   // calculate proper numbers of rows for the chunks
   if (_nrows != null) {
     _numRows = 0;
     for (int i = 0; i < _nrows.length; ++i) {
       _numRows += _nrows[i];
       _nrows[i] = _numRows;
     }
   } else {
     _numRows = _myrows;
   }
   // normalize mean
   for (int i = 0; i < _ncolumns; ++i) _mean[i] = _mean[i] / (_numRows - _invalidValues[i]);
 }
Exemple #15
0
 // Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are
 // asked-for the first time, we make the Key and an empty backing DVec.
 // Touching the DVec will force the file load.
 @Override
 public Value chunkIdx(int cidx) {
   final long nchk = nChunks();
   assert 0 <= cidx && cidx < nchk;
   Key dkey = chunkKey(cidx);
   Value val1 = DKV.get(dkey); // Check for an existing one... will fetch data as needed
   if (val1 != null) return val1; // Found an existing one?
   // Lazily create a DVec for this chunk
   int len = (int) (cidx < nchk - 1 ? CHUNK_SZ : (_len - chunk2StartElem(cidx)));
   // DVec is just the raw file data with a null-compression scheme
   Value val2 = new Value(dkey, len, null, TypeMap.C1NCHUNK, _be);
   val2.setdsk(); // It is already on disk.
   // If not-home, then block till the Key is everywhere.  Most calls here are
   // from the parser loading a text file, and the parser splits the work such
   // that most puts here are on home - so this is a simple speed optimization:
   // do not make a Futures nor block on it on home.
   Futures fs = dkey.home() ? null : new Futures();
   // Atomically insert: fails on a race, but then return the old version
   Value val3 = DKV.DputIfMatch(dkey, val2, null, fs);
   if (!dkey.home() && fs != null) fs.blockForPending();
   return val3 == null ? val2 : val3;
 }
Exemple #16
0
 static boolean checkSaneFrame_impl() {
   for (Key k : H2O.localKeySet()) {
     Value val = H2O.raw_get(k);
     if (val.isFrame()) {
       Frame fr = val.get();
       Vec vecs[] = fr.vecs();
       for (int i = 0; i < vecs.length; i++) {
         Vec v = vecs[i];
         if (DKV.get(v._key) == null) {
           System.err.println(
               "Frame "
                   + fr._key
                   + " in the DKV, is missing Vec "
                   + v._key
                   + ", name="
                   + fr._names[i]);
           return false;
         }
       }
     }
   }
   return true;
 }
Exemple #17
0
    @Override
    public void lcompute() {
      // Optional: cancel all jobs
      //      for (Job job : Job.all()) {
      //        job.cancel();
      //        Job.waitUntilJobEnded(job.self());
      //      }

      final Set<Key> keySet = H2O.globalKeySet(null);
      for (Key key : keySet) {
        if (!key.home()) continue; // only unlock local keys
        final Value val = DKV.get(key);
        if (val == null) continue;
        if (val.rawPOJO() == null) continue; // need to have a POJO to be locked
        if (!val.isLockable()) continue;
        final Object obj = val.rawPOJO();
        assert (obj instanceof Lockable<?>);
        final Lockable<?> lockable = (Lockable<?>) (obj);
        final Key[] lockers = ((Lockable) obj)._lockers;
        if (lockers != null) {
          // check that none of the locking jobs is still running
          for (Key locker : lockers) {
            if (locker != null && locker.type() == Key.JOB) {
              final Job job = UKV.get(locker);
              if (job != null && job.isRunning())
                throw new UnsupportedOperationException(
                    "Cannot unlock all keys since locking jobs are still running.");
            }
          }
          lockable.unlock_all();
          Log.info("Unlocked key '" + key + "' from " + lockers.length + " lockers.");
        }
      }
      Log.info("All keys are now unlocked.");
      tryComplete();
    }
Exemple #18
0
 @Override
 protected JsonArray serve(String filter, int limit) {
   JsonArray array = new JsonArray();
   Key[] keys = new Key[limit];
   int len = 0;
   // Gather some keys that pass all filters
   for (Key key : H2O.keySet()) {
     if (filter != null
         && // Have a filter?
         key.toString().indexOf(filter) == -1) continue; // Ignore this filtered-out key
     if (!key.user_allowed()) // Also filter out for user-keys
     continue;
     Value val = DKV.get(key);
     if (val == null) continue; // Deleted key?
     if (_typeid != 0 && val.type() != _typeid) continue; // Wrong type?
     if (!shouldIncludeKey(key)) continue; // Generic override
     keys[len++] = key; // Capture the key
     if (len == keys.length) break;
   }
   // sort the keys, for pretty display & reliable ordering
   Arrays.sort(keys, 0, len);
   for (int i = 0; i < len; ++i) array.add(new JsonPrimitive(keys[i].toString()));
   return array;
 }
Exemple #19
0
 // Store Value v to disk.
 @Override
 public void store(Value v) {
   // Only the home node does persistence on NFS
   if (!v._key.home()) return;
   // A perhaps useless cutout: the upper layers should test this first.
   if (v.isPersisted()) return;
   // Never store arraylets on NFS, instead we'll store the entire array.
   assert !v.isArray();
   try {
     File f = getFileForKey(v._key);
     f.mkdirs();
     FileOutputStream s = new FileOutputStream(f);
     try {
       byte[] m = v.memOrLoad();
       assert (m == null || m.length == v._max); // Assert not saving partial files
       if (m != null) new AutoBuffer(s.getChannel(), false, Value.NFS).putA1(m, m.length).close();
       v.setdsk(); // Set as write-complete to disk
     } finally {
       s.close();
     }
   } catch (IOException e) {
     H2O.ignore(e);
   }
 }
Exemple #20
0
 @Override
 public void delete(final Value v) {
   assert this == getIce();
   assert !v.isPersisted(); // Upper layers already cleared out
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           Path p = new Path(_iceRoot, getIceName(v));
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           fs.delete(p, true);
           if (v.isArray()) { // Also nuke directory if the top-level ValueArray dies
             p = new Path(_iceRoot, getIceDirectory(v._key));
             fs = FileSystem.get(p.toUri(), CONF);
             fs.delete(p, true);
           }
           return null;
         }
       },
       false,
       0);
 }
Exemple #21
0
  /**
   * Copy properties "of the same name" from one POJO to the other. If the fields are named
   * consistently (both sides have fields named "_foo" and/or "bar") this acts like Apache Commons
   * PojoUtils.copyProperties(). If one side has leading underscores and the other does not then the
   * names are conformed according to the field_naming parameter.
   *
   * @param dest Destination POJO
   * @param origin Origin POJO
   * @param field_naming Are the fields named consistently, or does one side have underscores?
   * @param skip_fields Array of origin or destination field names to skip
   * @param only_fields Array of origin or destination field names to include; ones not in this list
   *     will be skipped
   */
  public static void copyProperties(
      Object dest,
      Object origin,
      FieldNaming field_naming,
      String[] skip_fields,
      String[] only_fields) {
    if (null == dest || null == origin) return;

    Field[] dest_fields = Weaver.getWovenFields(dest.getClass());
    Field[] orig_fields = Weaver.getWovenFields(origin.getClass());

    for (Field orig_field : orig_fields) {
      String origin_name = orig_field.getName();

      if (skip_fields != null & ArrayUtils.contains(skip_fields, origin_name)) continue;

      if (only_fields != null & !ArrayUtils.contains(only_fields, origin_name)) continue;

      String dest_name = null;
      if (field_naming == FieldNaming.CONSISTENT) {
        dest_name = origin_name;
      } else if (field_naming == FieldNaming.DEST_HAS_UNDERSCORES) {
        dest_name = "_" + origin_name;
      } else if (field_naming == FieldNaming.ORIGIN_HAS_UNDERSCORES) {
        dest_name = origin_name.substring(1);
      }

      if (skip_fields != null & ArrayUtils.contains(skip_fields, dest_name)) continue;

      if (only_fields != null & !ArrayUtils.contains(only_fields, dest_name)) continue;

      try {
        Field dest_field = null;
        for (Field fd : dest_fields) {
          if (fd.getName().equals(dest_name)) {
            dest_field = fd;
            break;
          }
        }

        if (dest_field != null) {
          dest_field.setAccessible(true);
          orig_field.setAccessible(true);
          // Log.info("PojoUtils.copyProperties, origin field: " + orig_field + "; destination
          // field: " + dest_field);
          if (null == orig_field.get(origin)) {
            //
            // Assigning null to dest.
            //
            dest_field.set(dest, null);
          } else if (dest_field.getType().isArray()
              && orig_field.getType().isArray()
              && (dest_field.getType().getComponentType()
                  != orig_field.getType().getComponentType())) {
            //
            // Assigning an array to another array.
            //
            // You can't use reflection to set an int[] with an Integer[].  Argh.
            // TODO: other types of arrays. . .
            if (dest_field.getType().getComponentType() == double.class
                && orig_field.getType().getComponentType() == Double.class) {
              //
              // Assigning an Double[] to an double[]
              //
              double[] copy = (double[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (dest_field.getType().getComponentType() == Double.class
                && orig_field.getType().getComponentType() == double.class) {
              //
              // Assigning an double[] to an Double[]
              //
              Double[] copy = (Double[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (dest_field.getType().getComponentType() == int.class
                && orig_field.getType().getComponentType() == Integer.class) {
              //
              // Assigning an Integer[] to an int[]
              //
              int[] copy = (int[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (dest_field.getType().getComponentType() == Integer.class
                && orig_field.getType().getComponentType() == int.class) {
              //
              // Assigning an int[] to an Integer[]
              //
              Integer[] copy = (Integer[]) orig_field.get(origin);
              dest_field.set(dest, copy);
            } else if (Schema.class.isAssignableFrom(dest_field.getType().getComponentType())
                && (Schema.getImplClass(
                        (Class<? extends Schema>) dest_field.getType().getComponentType()))
                    .isAssignableFrom(orig_field.getType().getComponentType())) {
              //
              // Assigning an array of impl fields to an array of schema fields, e.g. a
              // DeepLearningParameters[] into a DeepLearningParametersV2[]
              //
              Class dest_component_class = dest_field.getType().getComponentType();
              Schema[] translation =
                  (Schema[])
                      Array.newInstance(
                          dest_component_class, Array.getLength(orig_field.get(origin)));
              int i = 0;
              int version = ((Schema) dest).getSchemaVersion();

              // Look up the schema for each element of the array; if not found fall back to the
              // schema for the base class.
              for (Iced impl : ((Iced[]) orig_field.get(origin))) {
                if (null == impl) {
                  translation[i++] = null;
                } else {
                  Schema s = null;
                  try {
                    s = Schema.schema(version, impl);
                  } catch (H2ONotFoundArgumentException e) {
                    s = ((Schema) dest_field.getType().getComponentType().newInstance());
                  }
                  translation[i++] = s.fillFromImpl(impl);
                }
              }
              dest_field.set(dest, translation);
            } else if (Schema.class.isAssignableFrom(orig_field.getType().getComponentType())
                && Iced.class.isAssignableFrom(dest_field.getType().getComponentType())) {
              //
              // Assigning an array of schema fields to an array of impl fields, e.g. a
              // DeepLearningParametersV2[] into a DeepLearningParameters[]
              //
              // We can't check against the actual impl class I, because we can't instantiate the
              // schema base classes to get the impl class from an instance:
              // dest_field.getType().getComponentType().isAssignableFrom(((Schema)f.getType().getComponentType().newInstance()).getImplClass())) {
              Class dest_component_class = dest_field.getType().getComponentType();
              Iced[] translation =
                  (Iced[])
                      Array.newInstance(
                          dest_component_class, Array.getLength(orig_field.get(origin)));
              int i = 0;
              for (Schema s : ((Schema[]) orig_field.get(origin))) {
                translation[i++] = s.createImpl();
              }
              dest_field.set(dest, translation);
            } else {
              throw H2O.fail(
                  "Don't know how to cast an array of: "
                      + orig_field.getType().getComponentType()
                      + " to an array of: "
                      + dest_field.getType().getComponentType());
            }
            // end of array handling
          } else if (dest_field.getType() == Key.class
              && Keyed.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a Keyed (e.g., a Frame or Model) to a Key.
            //
            dest_field.set(dest, ((Keyed) orig_field.get(origin))._key);
          } else if (orig_field.getType() == Key.class
              && Keyed.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a Key (for e.g., a Frame or Model) to a Keyed (e.g., a Frame or Model).
            //
            Value v = DKV.get((Key) orig_field.get(origin));
            dest_field.set(dest, (null == v ? null : v.get()));
          } else if (KeyV3.class.isAssignableFrom(dest_field.getType())
              && Keyed.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a Keyed (e.g., a Frame or Model) to a KeyV1.
            //
            dest_field.set(
                dest,
                KeyV3.make(
                    ((Class<? extends KeyV3>) dest_field.getType()),
                    ((Keyed) orig_field.get(origin))._key));
          } else if (KeyV3.class.isAssignableFrom(orig_field.getType())
              && Keyed.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a KeyV1 (for e.g., a Frame or Model) to a Keyed (e.g., a Frame or Model).
            //
            KeyV3 k = (KeyV3) orig_field.get(origin);
            Value v = DKV.get(Key.make(k.name));
            dest_field.set(dest, (null == v ? null : v.get()));
          } else if (KeyV3.class.isAssignableFrom(dest_field.getType())
              && Key.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a Key to a KeyV1.
            //
            dest_field.set(
                dest,
                KeyV3.make(
                    ((Class<? extends KeyV3>) dest_field.getType()), (Key) orig_field.get(origin)));
          } else if (KeyV3.class.isAssignableFrom(orig_field.getType())
              && Key.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a KeyV1 to a Key.
            //
            KeyV3 k = (KeyV3) orig_field.get(origin);
            dest_field.set(dest, (null == k.name ? null : Key.make(k.name)));
          } else if (dest_field.getType() == Pattern.class
              && String.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a String to a Pattern.
            //
            dest_field.set(dest, Pattern.compile((String) orig_field.get(origin)));
          } else if (orig_field.getType() == Pattern.class
              && String.class.isAssignableFrom(dest_field.getType())) {
            //
            // We are assigning a Pattern to a String.
            //
            dest_field.set(dest, orig_field.get(origin).toString());
          } else if (dest_field.getType() == FrameV3.ColSpecifierV3.class
              && String.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a String to a ColSpecifier.  Note that we currently support only the
            // colname, not a frame name too.
            //
            dest_field.set(dest, new FrameV3.ColSpecifierV3((String) orig_field.get(origin)));
          } else if (orig_field.getType() == FrameV3.ColSpecifierV3.class
              && String.class.isAssignableFrom(dest_field.getType())) {
            //
            // We are assigning a ColSpecifierV2 to a String.  The column_name gets copied.
            //
            dest_field.set(dest, ((FrameV3.ColSpecifierV3) orig_field.get(origin)).column_name);
          } else if (Enum.class.isAssignableFrom(dest_field.getType())
              && String.class.isAssignableFrom(orig_field.getType())) {
            //
            // Assigning a String into an enum field.
            //
            Class<Enum> dest_class = (Class<Enum>) dest_field.getType();
            dest_field.set(dest, Enum.valueOf(dest_class, (String) orig_field.get(origin)));
          } else if (Enum.class.isAssignableFrom(orig_field.getType())
              && String.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning an enum field into a String.
            //
            Object o = orig_field.get(origin);
            dest_field.set(dest, (o == null ? null : o.toString()));
          } else if (Schema.class.isAssignableFrom(dest_field.getType())
              && Schema.getImplClass((Class<? extends Schema>) dest_field.getType())
                  .isAssignableFrom(orig_field.getType())) {
            //
            // Assigning an impl field into a schema field, e.g. a DeepLearningParameters into a
            // DeepLearningParametersV2.
            //
            dest_field.set(
                dest,
                Schema.schema(
                        /* ((Schema)dest).getSchemaVersion() TODO: remove HACK!! */ 3,
                        (Class<? extends Iced>) orig_field.get(origin).getClass())
                    .fillFromImpl((Iced) orig_field.get(origin)));
          } else if (Schema.class.isAssignableFrom(orig_field.getType())
              && Schema.getImplClass((Class<? extends Schema>) orig_field.getType())
                  .isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a schema field into an impl field, e.g. a DeepLearningParametersV2 into a
            // DeepLearningParameters.
            //
            Schema s = ((Schema) orig_field.get(origin));
            dest_field.set(dest, s.fillImpl(s.createImpl()));
          } else if ((Schema.class.isAssignableFrom(dest_field.getType())
              && Key.class.isAssignableFrom(orig_field.getType()))) {
            //
            // Assigning an impl field fetched via a Key into a schema field, e.g. a
            // DeepLearningParameters into a DeepLearningParametersV2.
            // Note that unlike the cases above we don't know the type of the impl class until we
            // fetch in the body of the if.
            //
            Key origin_key = (Key) orig_field.get(origin);
            Value v = DKV.get(origin_key);
            if (null == v || null == v.get()) {
              dest_field.set(dest, null);
            } else {
              if (((Schema) dest_field.get(dest))
                  .getImplClass()
                  .isAssignableFrom(v.get().getClass())) {
                Schema s = ((Schema) dest_field.get(dest));
                dest_field.set(
                    dest,
                    Schema.schema(s.getSchemaVersion(), s.getImplClass()).fillFromImpl(v.get()));
              } else {
                Log.err(
                    "Can't fill Schema of type: "
                        + dest_field.getType()
                        + " with value of type: "
                        + v.getClass()
                        + " fetched from Key: "
                        + origin_key);
                dest_field.set(dest, null);
              }
            }
          } else if (Schema.class.isAssignableFrom(orig_field.getType())
              && Keyed.class.isAssignableFrom(dest_field.getType())) {
            //
            // Assigning a schema field into a Key field, e.g. a DeepLearningV2 into a
            // (DeepLearningParameters) key.
            //
            Schema s = ((Schema) orig_field.get(origin));
            dest_field.set(dest, ((Keyed) s.fillImpl(s.createImpl()))._key);
          } else {
            //
            // Normal case: not doing any type conversion.
            //
            dest_field.set(dest, orig_field.get(origin));
          }
        }
      } catch (IllegalAccessException e) {
        Log.err(
            "Illegal access exception trying to copy field: "
                + origin_name
                + " of class: "
                + origin.getClass()
                + " to field: "
                + dest_name
                + " of class: "
                + dest.getClass());
      } catch (InstantiationException e) {
        Log.err(
            "Instantiation exception trying to copy field: "
                + origin_name
                + " of class: "
                + origin.getClass()
                + " to field: "
                + dest_name
                + " of class: "
                + dest.getClass());
      }
    }
  }
Exemple #22
0
 public static byte [] getFirstUnzipedBytes(Value v){
   byte [] bits = v.getFirstBytes();
   try{
     return unzipBytes(bits, guessCompressionMethod(bits));
   } catch(Exception e){return null;}
 }