Java Frame.domains примеры использования

Язык программирования: Java

Пространство имен/Пакет: water.fvec

Класс/Тип: Frame

Метод/Функция: domains

Примеров на hotexamples.com: 10

Java Frame.domains - 10 примеров найдено. Это лучшие примеры Java кода для water.fvec.Frame.domains, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

vecs(30)

delete(30)

vec(30)

remove(30)

numCols(30)

numRows(27)

add(23)

names(23)

anyVec(14)

domains(10)

find(8)

replace(8)

types(7)

toString(6)

name(6)

update(3)

unlock(3)

lastVec(3)

delete_and_lock(2)

restructure(2)

closeNewChunks(2)

read_lock(2)

insertVec(2)

extractFrame(2)

makeCompatible(2)

createNewChunks(2)

toStringHdr(1)

closeAppendables(1)

finalizePartialFrame(1)

getUniqueId(1)

toTwoDimTable(1)

preparePartialFrame(1)

toStringAll(1)

toCSV(1)

subframe(1)

lastVecName(1)

deepSlice(1)

reloadVecs(1)

defaultColName(1)

Пример #1

Показать файл

Файл: AggregatorTest.java Проект: h2oai/h2o-3

 @Test
 public void testDomains() {
   Frame frame = parse_test_file("smalldata/junit/weather.csv");
   for (String s : new String[] {"MaxWindSpeed", "RelHumid9am", "Cloud9am"}) {
     Vec v = frame.vec(s);
     Vec newV = v.toCategoricalVec();
     frame.remove(s);
     frame.add(s, newV);
     v.remove();
   }
   DKV.put(frame);
   AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
   parms._train = frame._key;
   parms._radius_scale = 10;
   AggregatorModel agg = new Aggregator(parms).trainModel().get();
   Frame output = agg._output._output_frame.get();
   Assert.assertTrue(output.numRows() < 0.5 * frame.numRows());
   boolean same = true;
   for (int i = 0; i < frame.numCols(); ++i) {
     if (frame.vec(i).isCategorical()) {
       same = (frame.domains()[i].length == output.domains()[i].length);
       if (!same) break;
     }
   }
   frame.remove();
   output.remove();
   agg.remove();
   Assert.assertFalse(same);
 }

Пример #2

Показать файл

Файл: MRUtils.java Проект: Jrobinso09/h2o

 /**
  * Sample rows from a frame. Can be unlucky for small sampling fractions - will continue calling
  * itself until at least 1 row is returned.
  *
  * @param fr Input frame
  * @param rows Approximate number of rows to sample (across all chunks)
  * @param seed Seed for RNG
  * @return Sampled frame
  */
 public static Frame sampleFrame(Frame fr, final long rows, final long seed) {
   if (fr == null) return null;
   final float fraction = rows > 0 ? (float) rows / fr.numRows() : 1.f;
   if (fraction >= 1.f) return fr;
   Frame r =
       new MRTask2() {
         @Override
         public void map(Chunk[] cs, NewChunk[] ncs) {
           final Random rng = getDeterRNG(seed + cs[0].cidx());
           int count = 0;
           for (int r = 0; r < cs[0]._len; r++)
             if (rng.nextFloat() < fraction || (count == 0 && r == cs[0]._len - 1)) {
               count++;
               for (int i = 0; i < ncs.length; i++) {
                 ncs[i].addNum(cs[i].at0(r));
               }
             }
         }
       }.doAll(fr.numCols(), fr).outputFrame(fr.names(), fr.domains());
   if (r.numRows() == 0) {
     Log.warn(
         "You asked for "
             + rows
             + " rows (out of "
             + fr.numRows()
             + "), but you got none (seed="
             + seed
             + ").");
     Log.warn("Let's try again. You've gotta ask yourself a question: \"Do I feel lucky?\"");
     return sampleFrame(fr, rows, seed + 1);
   }
   return r;
 }

Пример #3

Показать файл

Файл: GLRMModel.java Проект: vijaykiran/h2o-3

  /**
   * Project each archetype into original feature space
   *
   * @param frame Original training data with m rows and n columns
   * @param destination_key Frame Id for output
   * @return Frame containing k rows and n columns, where each row corresponds to an archetype
   */
  public Frame scoreArchetypes(Frame frame, Key destination_key, boolean reverse_transform) {
    final int ncols = _output._names.length;
    Frame adaptedFr = new Frame(frame);
    adaptTestForTrain(adaptedFr, true, false);
    assert ncols == adaptedFr.numCols();
    String[][] adaptedDomme = adaptedFr.domains();
    double[][] proj = new double[_parms._k][_output._nnums + _output._ncats];

    // Categorical columns
    for (int d = 0; d < _output._ncats; d++) {
      double[][] block = _output._archetypes_raw.getCatBlock(d);
      for (int k = 0; k < _parms._k; k++)
        proj[k][_output._permutation[d]] = _parms.mimpute(block[k], _output._lossFunc[d]);
    }

    // Numeric columns
    for (int d = _output._ncats; d < (_output._ncats + _output._nnums); d++) {
      int ds = d - _output._ncats;
      for (int k = 0; k < _parms._k; k++) {
        double num = _output._archetypes_raw.getNum(ds, k);
        proj[k][_output._permutation[d]] = _parms.impute(num, _output._lossFunc[d]);
        if (reverse_transform)
          proj[k][_output._permutation[d]] =
              proj[k][_output._permutation[d]] / _output._normMul[ds] + _output._normSub[ds];
      }
    }

    // Convert projection of archetypes into a frame with correct domains
    Frame f =
        ArrayUtils.frame(
            (null == destination_key ? Key.make() : destination_key), adaptedFr.names(), proj);
    for (int i = 0; i < ncols; i++) f.vec(i).setDomain(adaptedDomme[i]);
    return f;
  }

Пример #4

Показать файл

Файл: RebalanceDataSet.java Проект: EPBaron/h2o

 @Override
 public void compute2() {
   _in.read_lock(_jobKey);
   // simply create a bogus new vector (don't even put it into KV) with appropriate number of lines
   // per chunk and then use it as a source to do multiple makeZero calls
   // to create empty vecs and than call RebalanceTask on each one of them.
   // RebalanceTask will fetch the appropriate src chunks and fetch the data from them.
   int rpc = (int) (_in.numRows() / _nchunks);
   int rem = (int) (_in.numRows() % _nchunks);
   long[] espc = new long[_nchunks + 1];
   Arrays.fill(espc, rpc);
   for (int i = 0; i < rem; ++i) ++espc[i];
   long sum = 0;
   for (int i = 0; i < espc.length; ++i) {
     long s = espc[i];
     espc[i] = sum;
     sum += s;
   }
   assert espc[espc.length - 1] == _in.numRows()
       : "unexpected number of rows, expected " + _in.numRows() + ", got " + espc[espc.length - 1];
   final Vec[] srcVecs = _in.vecs();
   _out =
       new Frame(
           _okey,
           _in.names(),
           new Vec(Vec.newKey(), espc).makeZeros(srcVecs.length, _in.domains()));
   _out.delete_and_lock(_jobKey);
   new RebalanceTask(this, srcVecs).asyncExec(_out);
 }

Пример #5

Показать файл

Файл: ASTColSlice.java Проект: hickeye/h2o-3

 @Override
 Val apply(Env env, Env.StackHelp stk, AST asts[]) {
   Frame fr = stk.track(asts[1].exec(env)).getFrame();
   if (fr.numCols() == 1 && fr.numRows() == 1) {
     if (fr.anyVec().isNumeric() || fr.anyVec().isBad()) return new ValNum(fr.anyVec().at(0));
     else if (fr.anyVec().isString())
       return new ValStr(fr.anyVec().atStr(new BufferedString(), 0).toString());
     return new ValStr(fr.domains()[0][(int) fr.anyVec().at8(0)]);
   }
   return new ValFrame(fr); // did not flatten
 }

Пример #6

Показать файл

Файл: FrameSplitter.java Проект: Jfeng3/h2o

 // Make vector templates for all output frame vectors
 private Vec[][] makeTemplates(Frame dataset, float[] ratios) {
   Vec anyVec = dataset.anyVec();
   final long[][] espcPerSplit = computeEspcPerSplit(anyVec._espc, anyVec.length(), ratios);
   final int num = dataset.numCols(); // number of columns in input frame
   final int nsplits = espcPerSplit.length; // number of splits
   final String[][] domains = dataset.domains(); // domains
   Vec[][] t = new Vec[nsplits][ /*num*/]; // resulting vectors for all
   for (int i = 0; i < nsplits; i++) {
     // vectors for j-th split
     t[i] = new Vec(Vec.newKey(), espcPerSplit[i /*-th split*/]).makeZeros(num, domains);
   }
   return t;
 }

Пример #7

Показать файл

Файл: MRUtils.java Проект: Jrobinso09/h2o

 public static Frame shuffleFramePerChunk(Key outputFrameKey, Frame fr, final long seed) {
   Frame r =
       new MRTask2() {
         @Override
         public void map(Chunk[] cs, NewChunk[] ncs) {
           long[] idx = new long[cs[0]._len];
           for (int r = 0; r < idx.length; ++r) idx[r] = r;
           Utils.shuffleArray(idx, seed);
           for (int r = 0; r < idx.length; ++r) {
             for (int i = 0; i < ncs.length; i++) {
               ncs[i].addNum(cs[i].at0((int) idx[r]));
             }
           }
         }
       }.doAll(fr.numCols(), fr).outputFrame(outputFrameKey, fr.names(), fr.domains());
   return r;
 }

Пример #8

Показать файл

Файл: GLRMModel.java Проект: vijaykiran/h2o-3

  // GLRM scoring is data imputation based on feature domains using reconstructed XY (see Udell
  // (2015), Section 5.3)
  private Frame reconstruct(
      Frame orig,
      Frame adaptedFr,
      Key destination_key,
      boolean save_imputed,
      boolean reverse_transform) {
    final int ncols = _output._names.length;
    assert ncols == adaptedFr.numCols();
    String prefix = "reconstr_";

    // Need [A,X,P] where A = adaptedFr, X = loading frame, P = imputed frame
    // Note: A is adapted to original training frame, P has columns shuffled so cats come before
    // nums!
    Frame fullFrm = new Frame(adaptedFr);
    Frame loadingFrm = DKV.get(_output._representation_key).get();
    fullFrm.add(loadingFrm);
    String[][] adaptedDomme = adaptedFr.domains();
    for (int i = 0; i < ncols; i++) {
      Vec v = fullFrm.anyVec().makeZero();
      v.setDomain(adaptedDomme[i]);
      fullFrm.add(prefix + _output._names[i], v);
    }
    GLRMScore gs = new GLRMScore(ncols, _parms._k, save_imputed, reverse_transform).doAll(fullFrm);

    // Return the imputed training frame
    int x = ncols + _parms._k, y = fullFrm.numCols();
    Frame f =
        fullFrm.extractFrame(
            x, y); // this will call vec_impl() and we cannot call the delete() below just yet

    f = new Frame((null == destination_key ? Key.make() : destination_key), f.names(), f.vecs());
    DKV.put(f);
    gs._mb.makeModelMetrics(
        GLRMModel.this, orig, null, null); // save error metrics based on imputed data
    return f;
  }

Пример #9

Показать файл

Файл: ASTColSlice.java Проект: hickeye/h2o-3

  @Override
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame returningFrame;
    long nrows = fr.numRows();
    if (asts[2] instanceof ASTNumList) {
      final ASTNumList nums = (ASTNumList) asts[2];
      long[] rows = nums._isList ? nums.expand8Sort() : null;
      if (rows != null) {
        if (rows.length == 0) { // Empty inclusion list?
        } else if (rows[0] >= 0) { // Positive (inclusion) list
          if (rows[rows.length - 1] > nrows)
            throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
        } else { // Negative (exclusion) list
          // Invert the list to make a positive list, ignoring out-of-bounds values
          BitSet bs = new BitSet((int) nrows);
          for (int i = 0; i < rows.length; i++) {
            int idx = (int) (-rows[i] - 1); // The positive index
            if (idx >= 0 && idx < nrows) bs.set(idx); // Set column to EXCLUDE
          }
          rows = new long[(int) nrows - bs.cardinality()];
          for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1))
            rows[j++] = i;
        }
      }
      final long[] ls = rows;

      returningFrame =
          new MRTask() {
            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
              if (nums.cnt() == 0) return;
              long start = cs[0].start();
              long end = start + cs[0]._len;
              long min = ls == null ? (long) nums.min() : ls[0],
                  max =
                      ls == null
                          ? (long) nums.max() - 1
                          : ls[ls.length - 1]; // exclusive max to inclusive max when stride == 1
              //     [ start, ...,  end ]     the chunk
              // 1 []                          nums out left:  nums.max() < start
              // 2                         []  nums out rite:  nums.min() > end
              // 3 [ nums ]                    nums run left:  nums.min() < start && nums.max() <=
              // end
              // 4          [ nums ]           nums run in  :  start <= nums.min() && nums.max() <=
              // end
              // 5                   [ nums ]  nums run rite:  start <= nums.min() && end <
              // nums.max()
              if (!(max < start || min > end)) { // not situation 1 or 2 above
                long startOffset = (min > start ? min : start); // situation 4 and 5 => min > start;
                for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
                  if ((ls == null && nums.has(start + i))
                      || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
                    for (int c = 0; c < cs.length; ++c) {
                      if (cs[c] instanceof CStrChunk) ncs[c].addStr(cs[c], i);
                      else if (cs[c] instanceof C16Chunk) ncs[c].addUUID(cs[c], i);
                      else if (cs[c].isNA(i)) ncs[c].addNA();
                      else ncs[c].addNum(cs[c].atd(i));
                    }
                  }
                }
              }
            }
          }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
    } else if ((asts[2] instanceof ASTNum)) {
      long[] rows = new long[] {(long) (((ASTNum) asts[2])._v.getNum())};
      returningFrame = fr.deepSlice(rows, null);
    } else if ((asts[2] instanceof ASTExec) || (asts[2] instanceof ASTId)) {
      Frame predVec = stk.track(asts[2].exec(env)).getFrame();
      if (predVec.numCols() != 1)
        throw new IllegalArgumentException(
            "Conditional Row Slicing Expression evaluated to "
                + predVec.numCols()
                + " columns.  Must be a boolean Vec.");
      returningFrame = fr.deepSlice(predVec, null);
    } else
      throw new IllegalArgumentException(
          "Row slicing requires a number-list as the last argument, but found a "
              + asts[2].getClass());
    return new ValFrame(returningFrame);
  }

Пример #10

Показать файл

Файл: MRUtils.java Проект: Jrobinso09/h2o

  // internal version with repeat counter
  // currently hardcoded to do up to 10 tries to get a row from each class, which can be impossible
  // for certain wrong sampling ratios
  private static Frame sampleFrameStratified(
      final Frame fr,
      Vec label,
      final float[] sampling_ratios,
      final long seed,
      final boolean debug,
      int count) {
    if (fr == null) return null;
    assert (label.isEnum());
    assert (sampling_ratios != null && sampling_ratios.length == label.domain().length);
    final int labelidx = fr.find(label); // which column is the label?
    assert (labelidx >= 0);

    final boolean poisson = false; // beta feature

    Frame r =
        new MRTask2() {
          @Override
          public void map(Chunk[] cs, NewChunk[] ncs) {
            final Random rng = getDeterRNG(seed + cs[0].cidx());
            for (int r = 0; r < cs[0]._len; r++) {
              if (cs[labelidx].isNA0(r)) continue; // skip missing labels
              final int label = (int) cs[labelidx].at80(r);
              assert (sampling_ratios.length > label && label >= 0);
              int sampling_reps;
              if (poisson) {
                sampling_reps = Utils.getPoisson(sampling_ratios[label], rng);
              } else {
                final float remainder = sampling_ratios[label] - (int) sampling_ratios[label];
                sampling_reps =
                    (int) sampling_ratios[label] + (rng.nextFloat() < remainder ? 1 : 0);
              }
              for (int i = 0; i < ncs.length; i++) {
                for (int j = 0; j < sampling_reps; ++j) {
                  ncs[i].addNum(cs[i].at0(r));
                }
              }
            }
          }
        }.doAll(fr.numCols(), fr).outputFrame(fr.names(), fr.domains());

    // Confirm the validity of the distribution
    long[] dist = new ClassDist(r.vecs()[labelidx]).doAll(r.vecs()[labelidx]).dist();

    // if there are no training labels in the test set, then there is no point in sampling the test
    // set
    if (dist == null) return fr;

    if (debug) {
      long sumdist = Utils.sum(dist);
      Log.info("After stratified sampling: " + sumdist + " rows.");
      for (int i = 0; i < dist.length; ++i) {
        Log.info(
            "Class "
                + r.vecs()[labelidx].domain(i)
                + ": count: "
                + dist[i]
                + " sampling ratio: "
                + sampling_ratios[i]
                + " actual relative frequency: "
                + (float) dist[i] / sumdist * dist.length);
      }
    }

    // Re-try if we didn't get at least one example from each class
    if (Utils.minValue(dist) == 0 && count < 10) {
      Log.info(
          "Re-doing stratified sampling because not all classes were represented (unlucky draw).");
      r.delete();
      return sampleFrameStratified(fr, label, sampling_ratios, seed + 1, debug, ++count);
    }

    // shuffle intra-chunk
    Frame shuffled = shuffleFramePerChunk(r, seed + 0x580FF13);
    r.delete();

    return shuffled;
  }