コード例 #1
0
ファイル: DataInfo.java プロジェクト: laurendiperna/h2o-3
 public final Row extractDenseRow(Chunk[] chunks, int rid, Row row) {
   row.bad = false;
   row.rid = rid + chunks[0].start();
   row.cid = rid;
   if (_weights) row.weight = chunks[weightChunkId()].atd(rid);
   if (row.weight == 0) return row;
   if (_skipMissing) {
     int N = _cats + _nums;
     for (int i = 0; i < N; ++i)
       if (chunks[i].isNA(rid)) {
         row.bad = true;
         return row;
       }
   }
   int nbins = 0;
   for (int i = 0; i < _cats; ++i) {
     int cid = getCategoricalId(i, chunks[i].isNA(rid) ? _catModes[i] : (int) chunks[i].at8(rid));
     if (cid >= 0) row.binIds[nbins++] = cid;
   }
   row.nBins = nbins;
   final int n = _nums;
   int numValsIdx = 0; // since we're dense, need a second index to track interaction nums
   for (int i = 0; i < n; i++) {
     if (isInteractionVec(
         _cats + i)) { // categorical-categorical interaction is handled as plain categorical
       // (above)... so if we have interactions either v1 is categorical, v2 is
       // categorical, or neither are categorical
       int offset = getInteractionOffset(chunks, _cats + i, rid);
       row.numVals[numValsIdx + offset] =
           chunks[_cats + i].atd(
               rid); // essentially: chunks[v1].atd(rid) * chunks[v2].atd(rid) (see
       // InteractionWrappedVec)
       numValsIdx += nextNumericIdx(i);
     } else {
       double d = chunks[_cats + i].atd(rid); // can be NA if skipMissing() == false
       if (Double.isNaN(d)) d = _numMeans[i];
       if (_normMul != null && _normSub != null)
         d = (d - _normSub[numValsIdx]) * _normMul[numValsIdx];
       row.numVals[numValsIdx++] = d;
     }
   }
   for (int i = 0; i < _responses; ++i) {
     try {
       row.response[i] = chunks[responseChunkId(i)].atd(rid);
     } catch (Throwable t) {
       throw new RuntimeException(t);
     }
     if (_normRespMul != null)
       row.response[i] = (row.response[i] - _normRespSub[i]) * _normRespMul[i];
     if (Double.isNaN(row.response[i])) {
       row.bad = true;
       return row;
     }
   }
   if (_offset) row.offset = chunks[offsetChunkId()].atd(rid);
   return row;
 }
コード例 #2
0
ファイル: DataInfo.java プロジェクト: liaochy/h2o-3
  public final Row extractDenseRow(Chunk[] chunks, int rid, Row row) {
    row.bad = false;
    row.rid = rid + chunks[0].start();
    if (_weights) row.weight = chunks[weightChunkId()].atd(rid);
    if (row.weight == 0) return row;
    if (_skipMissing)
      for (Chunk c : chunks)
        if (c.isNA(rid)) {
          row.bad = true;
          return row;
        }
    int nbins = 0;
    for (int i = 0; i < _cats; ++i) {
      if (chunks[i].isNA(rid)) {
        if (_imputeMissing) {
          int c = getCategoricalId(i, _catModes[i]);
          if (c >= 0) row.binIds[nbins++] = c;
        } else // TODO: What if missingBucket = false?
        row.binIds[nbins++] =
              _catOffsets[i + 1] - 1; // missing value turns into extra (last) factor
      } else {
        int c = getCategoricalId(i, (int) chunks[i].at8(rid));
        if (c >= 0) row.binIds[nbins++] = c;
      }
    }
    row.nBins = nbins;
    final int n = _nums;
    for (int i = 0; i < n; ++i) {
      double d = chunks[_cats + i].atd(rid); // can be NA if skipMissing() == false
      if (_imputeMissing && Double.isNaN(d)) d = _numMeans[i];
      if (_normMul != null && _normSub != null) d = (d - _normSub[i]) * _normMul[i];
      row.numVals[i] = d;
    }
    for (int i = 0; i < _responses; ++i) {
      row.response[i] = chunks[responseChunkId()].atd(rid);
      if (_normRespMul != null)
        row.response[i] = (row.response[i] - _normRespSub[i]) * _normRespMul[i];
      if (Double.isNaN(row.response[i])) {
        row.bad = true;
        return row;
      }
    }
    if (_offset) row.offset = chunks[offsetChunkId()].atd(rid);

    return row;
  }