Java NewChunk.type Examples

Programming Language: Java

Namespace/Package Name: water.fvec

Class/Type: NewChunk

Method/Function: type

Examples at hotexamples.com: 2

Java NewChunk.type - 2 examples found. These are the top rated real world Java examples of water.fvec.NewChunk.type extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

addNum(13)

set_sparseLen(11)

cancel_sparse(10)

addNA(10)

addStr(8)

append2(7)

set_sparse(5)

close(5)

addUUID(4)

values(4)

sparseLen(4)

_xs(3)

append_ss(3)

_ls(3)

len(3)

alloc_indices(3)

compress(3)

sparse(2)

set_len(2)

setNA_impl2(2)

switch_to_doubles(2)

type(2)

at0(2)

at(2)

_ds(2)

append2slowstr(2)

alloc_mantissa(2)

_len(2)

alloc_doubles(2)

_id(2)

append2slowd(2)

append2slow(2)

alloc_str_indices(2)

isNA(1)

isNA0(1)

alloc_exponent(1)

cidx(1)

setNA_impl(1)

addZeros(1)

add(1)

_ss(1)

_sparseLen(1)

_len2(1)

append2slowUUID(1)

_is(1)

setDoubles(1)

Example #1

Show file

File: NewChunk.java Project: hihihippp/h2o

  Chunk compress() {
    // Check for basic mode info: all missing or all strings or mixed stuff
    byte mode = type();
    if (mode == AppendableVec.NA) // ALL NAs, nothing to do
    return new C0DChunk(Double.NaN, _len);
    for (int i = 0; i < _len; i++)
      if (mode == AppendableVec.ENUM && !isEnum(i) || mode == AppendableVec.NUMBER && isEnum(i))
        setNA_impl(i);
    _naCnt = -1;
    type(); // Re-run rollups after dropping all numbers/enums

    // If the data was set8 as doubles, we do a quick check to see if it's
    // plain longs.  If not, we give up and use doubles.
    if (_ds != null) {
      int i = 0;
      for (; i < _len; i++) // Attempt to inject all doubles into longs
      if (!Double.isNaN(_ds[i]) && (double) (long) _ds[i] != _ds[i]) break;
      if (i < _len) return chunkD();
      _ls = new long[_ds.length]; // Else flip to longs
      _xs = new int[_ds.length];
      for (i = 0; i < _len; i++) // Inject all doubles into longs
      if (Double.isNaN(_ds[i])) _xs[i] = Integer.MIN_VALUE;
        else _ls[i] = (long) _ds[i];
      _ds = null;
    }

    // IF (_len2 > _len) THEN Sparse
    // Check for compressed *during appends*.  Here we know:
    // - No specials; _xs[]==0.
    // - No floats; _ds==null
    // - NZ length in _len, actual length in _len2.
    // - Huge ratio between _len2 and _len, and we do NOT want to inflate to
    //   the larger size; we need to keep it all small all the time.
    // - Rows in _xs

    // Data in some fixed-point format, not doubles
    // See if we can sanely normalize all the data to the same fixed-point.
    int xmin = Integer.MAX_VALUE; // min exponent found
    long lemin = 0, lemax = lemin; // min/max at xmin fixed-point
    boolean overflow = false;
    boolean floatOverflow = false;
    boolean first = true;
    double min = _len2 == _len ? Double.MAX_VALUE : 0;
    double max = _len2 == _len ? -Double.MAX_VALUE : 0;

    for (int i = 0; i < _len; i++) {
      if (isNA(i)) continue;
      long l = _ls[i];
      int x = _xs[i];
      if (x == Integer.MIN_VALUE + 1 || _len2 != _len) x = 0; // Replace enum flag with no scaling
      assert l != 0 || x == 0; // Exponent of zero is always zero
      // Compute per-chunk min/max
      double d = l * DParseTask.pow10(x);
      if (d < min) min = d;
      if (d > max) max = d;
      long t; // Remove extra scaling
      while (l != 0 && (t = l / 10) * 10 == l) {
        l = t;
        x++;
      }
      floatOverflow = Math.abs(l) > MAX_FLOAT_MANTISSA;
      if (first) {
        first = false;
        xmin = x;
        lemin = lemax = l;
        continue;
      }
      // Remove any trailing zeros / powers-of-10
      if (overflow || (overflow = (Math.abs(xmin - x)) >= 10)) continue;
      // Track largest/smallest values at xmin scale.  Note overflow.
      if (x < xmin) {
        lemin *= DParseTask.pow10i(xmin - x);
        lemax *= DParseTask.pow10i(xmin - x);
        xmin = x; // Smaller xmin
      }
      // *this* value, as a long scaled at the smallest scale
      long le = l * DParseTask.pow10i(x - xmin);
      if (le < lemin) lemin = le;
      if (le > lemax) lemax = le;
    }
    if (_len2 != _len) { // sparse? compare xmin/lemin/lemax with 0
      lemin = Math.min(0, lemin);
      lemax = Math.max(0, lemax);
    }

    // Constant column?
    if (_naCnt == 0 && min == max) {
      return ((long) min == min) ? new C0LChunk((long) min, _len2) : new C0DChunk(min, _len2);
    }

    // Boolean column?
    if (max == 1 && min == 0 && xmin == 0) {
      if (_nzCnt * 32 < _len2
          && _naCnt == 0
          && _len2 < 65535
          && xmin == 0) // Very sparse? (and not too big?)
      if (_len2 == _len) return new CX0Chunk(_ls, _len2, _nzCnt); // Dense  constructor
        else return new CX0Chunk(_xs, _len2, _len); // Sparse constructor
      int bpv = _strCnt + _naCnt > 0 ? 2 : 1; // Bit-vector
      byte[] cbuf = bufB(bpv);
      return new CBSChunk(cbuf, cbuf[0], cbuf[1]);
    }

    final boolean fpoint = xmin < 0 || min < Long.MIN_VALUE || max > Long.MAX_VALUE;

    // Result column must hold floats?
    // Highly sparse but not a bitvector or constant?
    if (!fpoint
        && (_nzCnt + _naCnt) * 8 < _len2
        && _len2 < 65535
        && xmin == 0
        && // (and not too big?)
        lemin > Short.MIN_VALUE
        && lemax <= Short.MAX_VALUE) // Only handling unbiased shorts here
    if (_len2 == _len) return new CX2Chunk(_ls, _xs, _len2, _nzCnt, _naCnt); // Sparse byte chunk
      else return new CX2Chunk(_ls, _xs, _len2, _len);

    // Exponent scaling: replacing numbers like 1.3 with 13e-1.  '13' fits in a
    // byte and we scale the column by 0.1.  A set of numbers like
    // {1.2,23,0.34} then is normalized to always be represented with 2 digits
    // to the right: {1.20,23.00,0.34} and we scale by 100: {120,2300,34}.
    // This set fits in a 2-byte short.

    // We use exponent-scaling for bytes & shorts only; it's uncommon (and not
    // worth it) for larger numbers.  We need to get the exponents to be
    // uniform, so we scale up the largest lmax by the largest scale we need
    // and if that fits in a byte/short - then it's worth compressing.  Other
    // wise we just flip to a float or double representation.
    if (overflow || fpoint && floatOverflow || -35 > xmin || xmin > 35) return chunkD();
    if (fpoint) {
      if (lemax - lemin < 255) // Fits in scaled biased byte?
      return new C1SChunk(bufX(lemin, xmin, C1SChunk.OFF, 0), (int) lemin, DParseTask.pow10(xmin));
      if (lemax - lemin < 65535) { // we use signed 2B short, add -32k to the bias!
        long bias = 32767 + lemin;
        return new C2SChunk(bufX(bias, xmin, C2SChunk.OFF, 1), (int) bias, DParseTask.pow10(xmin));
      }
      if (lemax - lemin < Integer.MAX_VALUE)
        return new C4SChunk(
            bufX(lemin, xmin, C4SChunk.OFF, 2), (int) lemin, DParseTask.pow10(xmin));
      return chunkD();
    } // else an integer column
    // Compress column into a byte
    if (xmin == 0 && 0 <= lemin && lemax <= 255 && ((_naCnt + _strCnt) == 0))
      return new C1NChunk(bufX(0, 0, C1NChunk.OFF, 0));
    if (lemax - lemin < 255) { // Span fits in a byte?
      if (0 <= min && max < 255) // Span fits in an unbiased byte?
      return new C1Chunk(bufX(0, 0, C1Chunk.OFF, 0));
      return new C1SChunk(bufX(lemin, xmin, C1SChunk.OFF, 0), (int) lemin, DParseTask.pow10i(xmin));
    }

    // Compress column into a short
    if (lemax - lemin < 65535) { // Span fits in a biased short?
      if (xmin == 0
          && Short.MIN_VALUE < lemin
          && lemax <= Short.MAX_VALUE) // Span fits in an unbiased short?
      return new C2Chunk(bufX(0, 0, C2Chunk.OFF, 1));
      int bias = (int) (lemin - (Short.MIN_VALUE + 1));
      return new C2SChunk(bufX(bias, xmin, C2SChunk.OFF, 1), bias, DParseTask.pow10i(xmin));
    }
    // Compress column into ints
    if (Integer.MIN_VALUE < min && max <= Integer.MAX_VALUE) return new C4Chunk(bufX(0, 0, 0, 2));
    return new C8Chunk(bufX(0, 0, 0, 3));
  }

Example #2

Show file

File: NewChunk.java Project: asRodelgo/h2o-3

  private Chunk compress2() {
    // Check for basic mode info: all missing or all strings or mixed stuff
    byte mode = type();
    if (mode == Vec.T_BAD) // ALL NAs, nothing to do
    return new C0DChunk(Double.NaN, sparseLen());
    if (mode == Vec.T_STR) return new CStrChunk(_sslen, _ss, sparseLen(), _len, _is, _isAllASCII);
    boolean rerun = false;
    if (mode == Vec.T_CAT) {
      for (int i = 0; i < sparseLen(); i++)
        if (isCategorical2(i)) _xs[i] = 0;
        else if (!isNA2(i)) {
          setNA_impl2(i);
          ++_naCnt;
        }
      // Smack any mismatched string/numbers
    } else if (mode == Vec.T_NUM) {
      for (int i = 0; i < sparseLen(); i++)
        if (isCategorical2(i)) {
          setNA_impl2(i);
          rerun = true;
        }
    }
    if (rerun) {
      _naCnt = -1;
      type();
    } // Re-run rollups after dropping all numbers/categoricals

    boolean sparse = false;
    // sparse? treat as sparse iff we have at least MIN_SPARSE_RATIOx more zeros than nonzeros
    if (_sparseRatio * (_naCnt + _nzCnt) < _len) {
      set_sparse(_naCnt + _nzCnt);
      sparse = true;
    } else if (sparseLen() != _len) cancel_sparse();

    // If the data is UUIDs there's not much compression going on
    if (_ds != null && _ls != null) return chunkUUID();
    // cut out the easy all NaNs case
    if (_naCnt == _len) return new C0DChunk(Double.NaN, _len);
    // If the data was set8 as doubles, we do a quick check to see if it's
    // plain longs.  If not, we give up and use doubles.
    if (_ds != null) {
      int i; // check if we can flip to ints
      for (i = 0; i < sparseLen(); ++i)
        if (!Double.isNaN(_ds[i]) && (double) (long) _ds[i] != _ds[i]) break;
      boolean isInteger = i == sparseLen();
      boolean isConstant = !sparse || sparseLen() == 0;
      double constVal = 0;
      if (!sparse) { // check the values, sparse with some nonzeros can not be constant - has 0s and
        // (at least 1) nonzero
        constVal = _ds[0];
        for (int j = 1; j < _len; ++j)
          if (_ds[j] != constVal) {
            isConstant = false;
            break;
          }
      }
      if (isConstant)
        return isInteger ? new C0LChunk((long) constVal, _len) : new C0DChunk(constVal, _len);
      if (!isInteger) return sparse ? new CXDChunk(_len, sparseLen(), 8, bufD(8)) : chunkD();
      // Else flip to longs
      _ls = new long[_ds.length];
      _xs = new int[_ds.length];
      double[] ds = _ds;
      _ds = null;
      final int naCnt = _naCnt;
      for (i = 0; i < sparseLen(); i++) // Inject all doubles into longs
      if (Double.isNaN(ds[i])) setNA_impl2(i);
        else _ls[i] = (long) ds[i];
      // setNA_impl2 will set _naCnt to -1!
      // we already know what the naCnt is (it did not change!) so set it back to correct value
      _naCnt = naCnt;
    }

    // IF (_len > _sparseLen) THEN Sparse
    // Check for compressed *during appends*.  Here we know:
    // - No specials; _xs[]==0.
    // - No floats; _ds==null
    // - NZ length in _sparseLen, actual length in _len.
    // - Huge ratio between _len and _sparseLen, and we do NOT want to inflate to
    //   the larger size; we need to keep it all small all the time.
    // - Rows in _xs

    // Data in some fixed-point format, not doubles
    // See if we can sanely normalize all the data to the same fixed-point.
    int xmin = Integer.MAX_VALUE; // min exponent found
    boolean floatOverflow = false;
    double min = Double.POSITIVE_INFINITY;
    double max = Double.NEGATIVE_INFINITY;
    int p10iLength = PrettyPrint.powers10i.length;
    long llo = Long.MAX_VALUE, lhi = Long.MIN_VALUE;
    int xlo = Integer.MAX_VALUE, xhi = Integer.MIN_VALUE;

    for (int i = 0; i < sparseLen(); i++) {
      if (isNA2(i)) continue;
      long l = _ls[i];
      int x = _xs[i];
      assert x != Integer.MIN_VALUE : "l = " + l + ", x = " + x;
      if (x == Integer.MIN_VALUE + 1) x = 0; // Replace categorical flag with no scaling
      assert l != 0 || x == 0
          : "l == 0 while x = "
              + x
              + " ls = "
              + Arrays.toString(_ls); // Exponent of zero is always zero
      long t; // Remove extra scaling
      while (l != 0 && (t = l / 10) * 10 == l) {
        l = t;
        x++;
      }
      // Compute per-chunk min/max
      double d = l * PrettyPrint.pow10(x);
      if (d < min) {
        min = d;
        llo = l;
        xlo = x;
      }
      if (d > max) {
        max = d;
        lhi = l;
        xhi = x;
      }
      floatOverflow = l < Integer.MIN_VALUE + 1 || l > Integer.MAX_VALUE;
      xmin = Math.min(xmin, x);
    }
    if (sparse) { // sparse?  then compare vs implied 0s
      if (min > 0) {
        min = 0;
        llo = 0;
        xlo = 0;
      }
      if (max < 0) {
        max = 0;
        lhi = 0;
        xhi = 0;
      }
      xmin = Math.min(xmin, 0);
    }
    // Constant column?
    if (_naCnt == 0 && (min == max)) {
      if (llo == lhi && xlo == 0 && xhi == 0) return new C0LChunk(llo, _len);
      else if ((long) min == min) return new C0LChunk((long) min, _len);
      else return new C0DChunk(min, _len);
    }

    // Compute min & max, as scaled integers in the xmin scale.
    // Check for overflow along the way
    boolean overflow = ((xhi - xmin) >= p10iLength) || ((xlo - xmin) >= p10iLength);
    long lemax = 0, lemin = 0;
    if (!overflow) { // Can at least get the power-of-10 without overflow
      long pow10 = PrettyPrint.pow10i(xhi - xmin);
      lemax = lhi * pow10;
      // Hacker's Delight, Section 2-13, checking overflow.
      // Note that the power-10 is always positive, so the test devolves this:
      if ((lemax / pow10) != lhi) overflow = true;
      // Note that xlo might be > xmin; e.g. { 101e-49 , 1e-48}.
      long pow10lo = PrettyPrint.pow10i(xlo - xmin);
      lemin = llo * pow10lo;
      if ((lemin / pow10lo) != llo) overflow = true;
    }

    // Boolean column?
    if (max == 1 && min == 0 && xmin == 0 && !overflow) {
      if (sparse) { // Very sparse?
        return _naCnt == 0
            ? new CX0Chunk(_len, sparseLen(), bufS(0)) // No NAs, can store as sparse bitvector
            : new CXIChunk(_len, sparseLen(), 1, bufS(1)); // have NAs, store as sparse 1byte values
      }

      int bpv = _catCnt + _naCnt > 0 ? 2 : 1; // Bit-vector
      byte[] cbuf = bufB(bpv);
      return new CBSChunk(cbuf, cbuf[0], cbuf[1]);
    }

    final boolean fpoint = xmin < 0 || min < Long.MIN_VALUE || max > Long.MAX_VALUE;

    if (sparse) {
      if (fpoint) return new CXDChunk(_len, sparseLen(), 8, bufD(8));
      int sz = 8;
      if (Short.MIN_VALUE <= min && max <= Short.MAX_VALUE) sz = 2;
      else if (Integer.MIN_VALUE <= min && max <= Integer.MAX_VALUE) sz = 4;
      return new CXIChunk(_len, sparseLen(), sz, bufS(sz));
    }
    // Exponent scaling: replacing numbers like 1.3 with 13e-1.  '13' fits in a
    // byte and we scale the column by 0.1.  A set of numbers like
    // {1.2,23,0.34} then is normalized to always be represented with 2 digits
    // to the right: {1.20,23.00,0.34} and we scale by 100: {120,2300,34}.
    // This set fits in a 2-byte short.

    // We use exponent-scaling for bytes & shorts only; it's uncommon (and not
    // worth it) for larger numbers.  We need to get the exponents to be
    // uniform, so we scale up the largest lmax by the largest scale we need
    // and if that fits in a byte/short - then it's worth compressing.  Other
    // wise we just flip to a float or double representation.
    if (overflow || (fpoint && floatOverflow) || -35 > xmin || xmin > 35) return chunkD();
    final long leRange = leRange(lemin, lemax);
    if (fpoint) {
      if ((int) lemin == lemin && (int) lemax == lemax) {
        if (leRange < 255) // Fits in scaled biased byte?
        return new C1SChunk(bufX(lemin, xmin, C1SChunk._OFF, 0), lemin, PrettyPrint.pow10(xmin));
        if (leRange < 65535) { // we use signed 2B short, add -32k to the bias!
          long bias = 32767 + lemin;
          return new C2SChunk(bufX(bias, xmin, C2SChunk._OFF, 1), bias, PrettyPrint.pow10(xmin));
        }
      }
      if (leRange < 4294967295l) {
        long bias = 2147483647l + lemin;
        return new C4SChunk(bufX(bias, xmin, C4SChunk._OFF, 2), bias, PrettyPrint.pow10(xmin));
      }
      return chunkD();
    } // else an integer column

    // Compress column into a byte
    if (xmin == 0 && 0 <= lemin && lemax <= 255 && ((_naCnt + _catCnt) == 0))
      return new C1NChunk(bufX(0, 0, C1NChunk._OFF, 0));
    if (lemin < Integer.MIN_VALUE) return new C8Chunk(bufX(0, 0, 0, 3));
    if (leRange < 255) { // Span fits in a byte?
      if (0 <= min && max < 255) // Span fits in an unbiased byte?
      return new C1Chunk(bufX(0, 0, C1Chunk._OFF, 0));
      return new C1SChunk(bufX(lemin, xmin, C1SChunk._OFF, 0), lemin, PrettyPrint.pow10i(xmin));
    }

    // Compress column into a short
    if (leRange < 65535) { // Span fits in a biased short?
      if (xmin == 0
          && Short.MIN_VALUE < lemin
          && lemax <= Short.MAX_VALUE) // Span fits in an unbiased short?
      return new C2Chunk(bufX(0, 0, C2Chunk._OFF, 1));
      long bias = (lemin - (Short.MIN_VALUE + 1));
      return new C2SChunk(bufX(bias, xmin, C2SChunk._OFF, 1), bias, PrettyPrint.pow10i(xmin));
    }
    // Compress column into ints
    if (Integer.MIN_VALUE < min && max <= Integer.MAX_VALUE) return new C4Chunk(bufX(0, 0, 0, 2));
    return new C8Chunk(bufX(0, 0, 0, 3));
  }