Exemple #1
0
 public DataInfo validDinfo(Frame valid) {
   DataInfo res =
       new DataInfo(
           _adaptedFrame,
           null,
           1,
           _useAllFactorLevels,
           TransformType.NONE,
           TransformType.NONE,
           _skipMissing,
           _imputeMissing,
           !(_skipMissing || _imputeMissing),
           _weights,
           _offset,
           _fold);
   res._adaptedFrame = new Frame(_adaptedFrame.names(), valid.vecs(_adaptedFrame.names()));
   res._valid = true;
   return res;
 }
Exemple #2
0
 public DataInfo scoringInfo() {
   DataInfo res =
       new DataInfo(
           _adaptedFrame,
           null,
           1,
           _useAllFactorLevels,
           TransformType.NONE,
           TransformType.NONE,
           _skipMissing,
           _imputeMissing,
           !_skipMissing,
           _weights,
           _offset,
           _fold);
   res._adaptedFrame = null;
   res._weights = false;
   res._offset = false;
   res._fold = false;
   res._responses = 0;
   res._valid = true;
   res._interactions = _interactions;
   return res;
 }
Exemple #3
0
 public DataInfo filterExpandedColumns(int[] cols) {
   assert _predictor_transform != null;
   assert _response_transform != null;
   if (cols == null) return deep_clone();
   int hasIcpt = (cols.length > 0 && cols[cols.length - 1] == fullN()) ? 1 : 0;
   int i = 0, j = 0, ignoredCnt = 0;
   // public DataInfo(Frame fr, int hasResponses, boolean useAllFactorLvls, double [] normSub,
   // double [] normMul, double [] normRespSub, double [] normRespMul){
   int[][] catLvls = new int[_cats][];
   int[] ignoredCols = MemoryManager.malloc4(_nums + _cats);
   // first do categoricals...
   if (_catOffsets != null) {
     int coff = _useAllFactorLevels ? 0 : 1;
     while (i < cols.length && cols[i] < _catOffsets[_catOffsets.length - 1]) {
       int[] levels = MemoryManager.malloc4(_catOffsets[j + 1] - _catOffsets[j]);
       int k = 0;
       while (i < cols.length && cols[i] < _catOffsets[j + 1])
         levels[k++] = (cols[i++] - _catOffsets[j]) + coff;
       if (k > 0) catLvls[j] = Arrays.copyOf(levels, k);
       ++j;
     }
   }
   int[] catModes = _catModes;
   for (int k = 0; k < catLvls.length; ++k) if (catLvls[k] == null) ignoredCols[ignoredCnt++] = k;
   if (ignoredCnt > 0) {
     int[][] cs = new int[_cats - ignoredCnt][];
     catModes = new int[_cats - ignoredCnt];
     int y = 0;
     for (int c = 0; c < catLvls.length; ++c)
       if (catLvls[c] != null) {
         catModes[y] = _catModes[c];
         cs[y++] = catLvls[c];
       }
     assert y == cs.length;
     catLvls = cs;
   }
   // now numerics
   int prev = j = 0;
   for (; i < cols.length; ++i) {
     for (int k = prev; k < (cols[i] - numStart()); ++k) {
       ignoredCols[ignoredCnt++] = k + _cats;
       ++j;
     }
     prev = ++j;
   }
   for (int k = prev; k < _nums; ++k) ignoredCols[ignoredCnt++] = k + _cats;
   Frame f = new Frame(_adaptedFrame.names().clone(), _adaptedFrame.vecs().clone());
   if (ignoredCnt > 0) f.remove(Arrays.copyOf(ignoredCols, ignoredCnt));
   assert catLvls.length < f.numCols() : "cats = " + catLvls.length + " numcols = " + f.numCols();
   double[] normSub = null;
   double[] normMul = null;
   int id = Arrays.binarySearch(cols, numStart());
   if (id < 0) id = -id - 1;
   int nnums = cols.length - id - hasIcpt;
   int off = numStart();
   if (_normSub != null) {
     normSub = new double[nnums];
     for (int k = id; k < (id + nnums); ++k) normSub[k - id] = _normSub[cols[k] - off];
   }
   if (_normMul != null) {
     normMul = new double[nnums];
     for (int k = id; k < (id + nnums); ++k) normMul[k - id] = _normMul[cols[k] - off];
   }
   // public DataInfo(Frame train, Frame valid, int nResponses, boolean useAllFactorLevels,
   // TransformType predictor_transform, TransformType response_transform, boolean skipMissing,
   // boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold) {
   DataInfo dinfo = new DataInfo(this, f, normMul, normSub, catLvls, catModes);
   dinfo._activeCols = cols;
   return dinfo;
 }