Example #1
0
  private static void addFolder2(
      FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) {
    try {
      if (fs == null) return;

      Futures futures = new Futures();
      for (FileStatus file : fs.listStatus(p)) {
        Path pfs = file.getPath();
        if (file.isDir()) {
          addFolder2(fs, pfs, keys, failed);
        } else {
          long size = file.getLen();
          Key res;
          if (pfs.getName().endsWith(Extensions.JSON)) {
            throw H2O.unimpl();
          } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
            throw H2O.unimpl();
          } else {
            Key k = null;
            keys.add((k = HdfsFileVec.make(file, futures)).toString());
            Log.info("PersistHdfs: DKV.put(" + k + ")");
          }
        }
      }
    } catch (Exception e) {
      Log.err(e);
      failed.add(p.toString());
    }
  }
Example #2
0
 @Override
 public void map(Chunk chks[], NewChunk nchks[]) {
   long rstart = chks[0]._start;
   int rlen = chks[0]._len; // Total row count
   int rx = 0; // Which row to in/ex-clude
   int rlo = 0; // Lo/Hi for this block of rows
   int rhi = rlen;
   while (true) { // Still got rows to include?
     if (_rows != null) { // Got a row selector?
       if (rx >= _rows.length) break; // All done with row selections
       long r = _rows[rx++] - 1; // Next row selector
       if (r < 0) { // Row exclusion
         if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl();
         long er = Math.abs(r) - 2;
         if (er < rstart) continue;
         // scoop up all of the rows before the first exclusion
         if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) {
           rlo = (int) rstart;
           rhi = (int) (er - rstart);
           _ex = false;
           rx--;
         } else {
           rlo = (int) (er + 1 - rstart);
           // TODO: handle jumbled row indices ( e.g. -c(1,5,3) )
           while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) {
             if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl();
             rx++;
             rlo++; // Exclude consecutive rows
           }
           rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2;
           if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl();
         }
       } else { // Positive row list?
         if (r < rstart) continue;
         rlo = (int) (r - rstart);
         rhi = rlo + 1; // Stop at the next row
         while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) {
           rx++;
           rhi++; // Grab sequential rows
         }
       }
     }
     // Process this next set of rows
     // For all cols in the new set
     for (int i = 0; i < _cols.length; i++) {
       Chunk oc = chks[_cols[i]];
       NewChunk nc = nchks[i];
       if (oc._vec.isInt()) { // Slice on integer columns
         for (int j = rlo; j < rhi; j++)
           if (oc.isNA0(j)) nc.addNA();
           else nc.addNum(oc.at80(j), 0);
       } else { // Slice on double columns
         for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j));
       }
     }
     rlo = rhi;
     if (_rows == null) break;
   }
 }
Example #3
0
 // Set & At on NewChunks are weird: only used after inflating some other
 // chunk.  At this point the NewChunk is full size, no more appends allowed,
 // and the xs exponent array should be only full of zeros.  Accesses must be
 // in-range and refer to the inflated values of the original Chunk.
 @Override
 boolean set_impl(int i, long l) {
   if (_ds != null) throw H2O.unimpl();
   if (_len2 != _len) throw H2O.unimpl();
   _ls[i] = l;
   _xs[i] = 0;
   return true;
 }
Example #4
0
  @Override
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {

    // Compute the variable args.  Find the common row count
    Val vals[] = new Val[asts.length];
    Vec vec = null;
    for (int i = 1; i < asts.length; i++) {
      vals[i] = stk.track(asts[i].exec(env));
      if (vals[i].isFrame()) {
        Vec anyvec = vals[i].getFrame().anyVec();
        if (anyvec == null) continue; // Ignore the empty frame
        if (vec == null) vec = anyvec;
        else if (vec.length() != anyvec.length())
          throw new IllegalArgumentException(
              "cbind frames must have all the same rows, found "
                  + vec.length()
                  + " and "
                  + anyvec.length()
                  + " rows.");
      }
    }
    boolean clean = false;
    if (vec == null) {
      vec = Vec.makeZero(1);
      clean = true;
    } // Default to length 1

    // Populate the new Frame
    Frame fr = new Frame();
    for (int i = 1; i < asts.length; i++) {
      switch (vals[i].type()) {
        case Val.FRM:
          fr.add(fr.makeCompatible(vals[i].getFrame()));
          break;
        case Val.FUN:
          throw H2O.unimpl();
        case Val.STR:
          throw H2O.unimpl();
        case Val.NUM:
          // Auto-expand scalars to fill every row
          double d = vals[i].getNum();
          fr.add(Double.toString(d), vec.makeCon(d));
          break;
        default:
          throw H2O.unimpl();
      }
    }
    if (clean) vec.remove();

    return new ValFrame(fr);
  }
Example #5
0
 @Override
 public double atd_impl(int i) {
   if (_len2 != _len) throw H2O.unimpl();
   if (_ds == null) return at8_impl(i);
   assert _xs == null;
   return _ds[i];
 }
Example #6
0
 // ------------------------------------------------------------------------
 // Zipped file; no parallel decompression; decompress into local chunks,
 // parse local chunks; distribute chunks later.
 ParseWriter streamParseZip(final InputStream is, final StreamParseWriter dout, InputStream bvs)
     throws IOException {
   // All output into a fresh pile of NewChunks, one per column
   if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl();
   StreamData din = new StreamData(is);
   int cidx = 0;
   StreamParseWriter nextChunk = dout;
   int zidx = bvs.read(null, 0, 0); // Back-channel read of chunk index
   assert zidx == 1;
   while (is.available() > 0) {
     int xidx = bvs.read(null, 0, 0); // Back-channel read of chunk index
     if (xidx > zidx) { // Advanced chunk index of underlying ByteVec stream?
       zidx = xidx; // Record advancing of chunk
       nextChunk.close(); // Match output chunks to input zipfile chunks
       if (dout != nextChunk) {
         dout.reduce(nextChunk);
         if (_jobKey != null && ((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()) break;
       }
       nextChunk = nextChunk.nextChunk();
     }
     parseChunk(cidx++, din, nextChunk);
   }
   parseChunk(cidx, din, nextChunk); // Parse the remaining partial 32K buffer
   nextChunk.close();
   if (dout != nextChunk) dout.reduce(nextChunk);
   return dout;
 }
Example #7
0
    public final double linkDeriv(double x) { // note: compute an inverse of what R does
      switch (_link) {
        case logit:
          //        case multinomial:
          double div = (x * (1 - x));
          if (div < 1e-6) return 1e6; // avoid numerical instability
          return 1.0 / div;
        case identity:
          return 1;
        case log:
          return 1.0 / x;
        case inverse:
          return -1.0 / (x * x);
        case tweedie:
          //          double res = _tweedie_link_power == 0
          //            ?Math.max(2e-16,Math.exp(x))
          //            // (1/lambda) * eta^(1/lambda - 1)
          //            :(1.0/_tweedie_link_power) * Math.pow(link(x), 1.0/_tweedie_link_power -
          // 1.0);

          return _tweedie_link_power == 0
              ? 1.0 / Math.max(2e-16, x)
              : _tweedie_link_power * Math.pow(x, _tweedie_link_power - 1);
        default:
          throw H2O.unimpl();
      }
    }
Example #8
0
 private void setTransform(
     TransformType t, double[] normMul, double[] normSub, int vecStart, int n) {
   for (int i = 0; i < n; ++i) {
     Vec v = _adaptedFrame.vec(vecStart + i);
     switch (t) {
       case STANDARDIZE:
         normMul[i] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         normSub[i] = v.mean();
         break;
       case NORMALIZE:
         normMul[i] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0;
         normSub[i] = v.mean();
         break;
       case DEMEAN:
         normMul[i] = 1;
         normSub[i] = v.mean();
         break;
       case DESCALE:
         normMul[i] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         normSub[i] = 0;
         break;
       default:
         throw H2O.unimpl();
     }
     assert !Double.isNaN(normMul[i]);
     assert !Double.isNaN(normSub[i]);
   }
 }
Example #9
0
 private void setTransform(
     TransformType t, double[] normMul, double[] normSub, int vecStart, int n) {
   int idx = 0; // idx!=i when interactions are in play, otherwise, it's just 'i'
   for (int i = 0; i < n; ++i) {
     Vec v = _adaptedFrame.vec(vecStart + i);
     boolean isIWV = isInteractionVec(vecStart + i);
     switch (t) {
       case STANDARDIZE:
         normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
         break;
       case NORMALIZE:
         normMul[idx] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
         break;
       case DEMEAN:
         normMul[idx] = 1;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
         break;
       case DESCALE:
         normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = 0;
         break;
       default:
         throw H2O.unimpl();
     }
     assert !Double.isNaN(normMul[idx]);
     assert !Double.isNaN(normSub[idx]);
     idx = isIWV ? (idx + nextNumericIdx(i)) : (idx + 1);
   }
 }
Example #10
0
 // TODO: Constant response shouldn't be regression. Need to override getModelCategory()
 @Override
 public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) {
   switch (_output.getModelCategory()) {
     case Binomial:
       return new ModelMetricsBinomial.MetricBuilderBinomial(domain);
     case Multinomial:
       return new ModelMetricsMultinomial.MetricBuilderMultinomial(domain.length, domain);
     default:
       throw H2O.unimpl();
   }
 }
Example #11
0
 // check if n is in this list of numbers
 // NB: all contiguous ranges have already been checked to have stride 1
 boolean has(long v) {
   assert _isSort; // Only called when already sorted
   // do something special for negative indexing... that does not involve
   // allocating arrays, once per list element!
   if (v < 0) throw H2O.unimpl();
   int idx = Arrays.binarySearch(_bases, v);
   if (idx >= 0) return true;
   idx = -idx - 2; // See Arrays.binarySearch; returns (-idx-1), we want +idx-1
   assert _bases[idx] < v; // Sanity check binary search, AND idx >= 0
   return v < _bases[idx] + _cnts[idx] * _strides[idx];
 }
Example #12
0
 ParseWriter streamParse(final InputStream is, final ParseWriter dout) throws IOException {
   if (!_setup._parse_type._parallelParseSupported) throw H2O.unimpl();
   StreamData din = new StreamData(is);
   int cidx = 0;
   // FIXME leaving _jobKey == null until sampling is done, this mean entire zip files
   // FIXME are parsed for parseSetup
   while (is.available() > 0
       && (_jobKey == null || !((Job) DKV.getGet(_jobKey)).isCancelledOrCrashed()))
     parseChunk(cidx++, din, dout);
   parseChunk(cidx, din, dout); // Parse the remaining partial 32K buffer
   return dout;
 }
Example #13
0
 @Override
 boolean setNA_impl(int i) {
   if (isNA(i)) return true;
   if (_len2 != _len) throw H2O.unimpl();
   if (_ls != null) {
     _ls[i] = 0;
     _xs[i] = Integer.MIN_VALUE;
   }
   if (_ds != null) {
     _ds[i] = Double.NaN;
   }
   return true;
 }
Example #14
0
 public boolean isSigmaScaled() {
   switch (this) {
     case NONE:
     case DEMEAN:
     case NORMALIZE:
       return false;
     case STANDARDIZE:
     case DESCALE:
       return true;
     default:
       throw H2O.unimpl();
   }
 }
Example #15
0
 @Override
 boolean set_impl(int i, double d) {
   if (_ls != null) { // Flip to using doubles
     if (_len2 != _len) throw H2O.unimpl();
     double ds[] = MemoryManager.malloc8d(_len);
     for (int j = 0; j < _len; j++)
       ds[j] = (isNA(j) || isEnum(j)) ? Double.NaN : _ls[j] * Math.pow(10, _xs[j]);
     _ds = ds;
     _ls = null;
     _xs = null;
   }
   _ds[i] = d;
   return true;
 }
Example #16
0
 public final boolean canonical() {
   switch (_family) {
     case gaussian:
       return _link == Link.identity;
     case binomial:
       return _link == Link.logit;
     case poisson:
       return _link == Link.log;
     case gamma:
       return _link == Link.inverse;
       //        case tweedie:
       //          return false;
     default:
       throw H2O.unimpl();
   }
 }
Example #17
0
 public final double linkDeriv(double x) {
   switch (link) {
     case logit:
       return 1 / (x * (1 - x));
     case identity:
       return 1;
     case log:
       return 1.0 / x;
     case inverse:
       return -1.0 / (x * x);
     case tweedie:
       return tweedie_link_power * Math.pow(x, tweedie_link_power - 1);
     default:
       throw H2O.unimpl();
   }
 }
Example #18
0
    public double[] expandCats() {
      if (isSparse() || _responses > 0) throw H2O.unimpl();

      int N = fullN();
      int numStart = numStart();
      double[] res = new double[N + (_intercept ? 1 : 0)];

      for (int i = 0; i < nBins; ++i) res[binIds[i]] = 1;
      if (numIds == null) {
        System.arraycopy(numVals, 0, res, numStart, numVals.length);
      } else {
        for (int i = 0; i < nNums; ++i) res[numIds[i]] = numVals[i];
      }
      if (_intercept) res[res.length - 1] = 1;
      return res;
    }
Example #19
0
 public final double linkDeriv(double x) { // note: compute an inverse of what R does
   switch (_link) {
     case logit:
       //        case multinomial:
       double div = (x * (1 - x));
       if (div < 1e-6) return 1e6; // avoid numerical instability
       return 1.0 / div;
     case identity:
       return 1;
     case log:
       return 1.0 / x;
     case inverse:
       return -1.0 / (x * x);
     case tweedie:
       return _link_power == 0
           ? 1.0 / Math.max(2e-16, x)
           : _link_power * Math.pow(x, _link_power - 1);
     default:
       throw H2O.unimpl();
   }
 }
Example #20
0
  @SuppressWarnings("fallthrough")
  private void calculateColumnEncodings() {
    assert (_bases != null);
    assert (_min != null);
    for (int i = 0; i < _ncolumns; ++i) {
      switch (_colTypes[i]) {
        case UCOL: // only missing values
          _colTypes[i] = BYTE;
          break;
        case ECOL: // enum
          if (_enums[i] == null || _enums[i].isKilled()) {
            _max[i] = 0;
            _min[i] = 0;
            _colTypes[i] = STRINGCOL;
          } else {
            _max[i] = _colDomains[i].length - 1;
            _min[i] = 0;
            if (_max[i] < 256) _colTypes[i] = BYTE;
            else if (_max[i] < 65536) _colTypes[i] = SHORT;
            else _colTypes[i] = INT;
          }
          break;
        case ICOL: // number
          if (_max[i] - _min[i] < 255) {
            _colTypes[i] = BYTE;
            _bases[i] = (int) _min[i];
          } else if ((_max[i] - _min[i]) < 65535) {
            _colTypes[i] = SHORT;
            _bases[i] = (int) _min[i];
          } else if (_max[i] - _min[i] < (1l << 32)) {
            _colTypes[i] = INT;
            _bases[i] = (int) _min[i];
          } else _colTypes[i] = LONG;
          break;
        case FCOL:
        case DCOL:
          if (_scale[i] >= -4
              && (_max[i] <= powers10i[powers10i.length - 1])
              && (_min[i] >= -powers10i[powers10i.length - 1])) {
            double s = pow10(-_scale[i]);
            double range = s * (_max[i] - _min[i]);
            double base = s * _min[i];
            if (range < 256) {
              if (fitsIntoInt(base)) { // check if base fits into int!
                _colTypes[i] = DBYTE;
                _bases[i] = (int) base;
                break;
              }
            } else if (range < 65535) {
              if (fitsIntoInt(base)) {
                _colTypes[i] = DSHORT;
                _bases[i] = (int) (base);
                break;
              }
            }
          }
          _scale[i] = 0;
          _bases[i] = 0;
          _colTypes[i] = (_colTypes[i] == FCOL) ? FLOAT : DOUBLE;
          break;
        case TCOL: // Time; millis since jan 1, 1970
          _scale[i] = -1;
          _bases[i] = 0;
          _min[i] = 0.0;
          _max[i] = System.currentTimeMillis();
          _colTypes[i] = LONG;
          break;

        default:
          throw H2O.unimpl();
      }
    }
  }
Example #21
0
 @Override
 protected long checksum_impl() {
   throw H2O.unimpl();
 } // don't really need checksum
Example #22
0
 @Override
 protected String errStr() {
   throw H2O.unimpl();
 }
Example #23
0
 @Override
 public String str() {
   throw H2O.unimpl();
 }
Example #24
0
 public StringBuilder toJavaStr(StringBuilder sb, String jname) {
   throw H2O.unimpl();
 }
Example #25
0
 public String unique_name() {
   throw H2O.unimpl();
 }
Example #26
0
 @Override
 public StringBuilder toJavaNum(StringBuilder sb, String jname) {
   throw H2O.unimpl();
 }
Example #27
0
 @Override
 public long at8_impl(int i) {
   if (_len2 != _len) throw H2O.unimpl();
   if (_ls == null) return (long) _ds[i];
   return _ls[i] * DParseTask.pow10i(_xs[i]);
 }
Example #28
0
 /**
  * Convert a collection of hyper-parameters into an array-of-doubles. Missing hyper parms will be
  * filled in with the default value. Error if the value cannot be represented as a double.
  *
  * @param hypers A set of {hyper-parameter field names, values}
  * @return The same set as a double[]
  */
 private double[] hyper2double(Map<String, Object> hypers) {
   throw H2O.unimpl();
 }
Example #29
0
 @Override
 public boolean isNA_impl(int i) {
   if (_len2 != _len) throw H2O.unimpl();
   return isNA(i);
 }
Example #30
0
 @Override
 protected long checksum_impl() {
   throw H2O.unimpl();
 }