Java ValueArray.getArrayKey Examples

Programming Language: Java

Namespace/Package Name: water

Class/Type: ValueArray

Method/Function: getArrayKey

Examples at hotexamples.com: 6

Java ValueArray.getArrayKey - 6 examples found. These are the top rated real world Java examples of water.ValueArray.getArrayKey extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

value(7)

numCols(6)

getArrayKey(6)

chunks(5)

getChunkKey(5)

getChunkOffset(5)

numRows(5)

getChunkIndex(3)

rpc(3)

colNames(2)

datad(2)

getChunk(2)

chknum(1)

getColumnIds(1)

readPut(1)

startRow(1)

Example #1

Show file

File: PersistNFS.java Project: pragnesh/h2o

 // Read up to 'len' bytes of Value. Value should already be persisted to
 // disk.  A racing delete can trigger a failure where we get a null return,
 // but no crash (although one could argue that a racing load&delete is a bug
 // no matter what).
 @Override
 public byte[] load(Value v) {
   long skip = 0;
   Key k = v._key;
   // Convert an arraylet chunk into a long-offset from the base file.
   if (k._kb[0] == Key.ARRAYLET_CHUNK) {
     skip = ValueArray.getChunkOffset(k); // The offset
     k = ValueArray.getArrayKey(k); // From the base file key
   }
   if (k._kb[0] == Key.DVEC) {
     skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset
   }
   try {
     FileInputStream s = null;
     try {
       s = new FileInputStream(getFileForKey(k));
       FileChannel fc = s.getChannel();
       fc.position(skip);
       AutoBuffer ab = new AutoBuffer(fc, true, Value.NFS);
       byte[] b = ab.getA1(v._max);
       ab.close();
       assert v.isPersisted();
       return b;
     } finally {
       if (s != null) s.close();
     }
   } catch (IOException e) { // Broken disk / short-file???
     H2O.ignore(e);
     return null;
   }
 }

Example #2

Show file

File: PersistHdfs.java Project: jayfans3/h2o

 @Override
 public Value lazyArrayChunk(final Key key) {
   final Key arykey = ValueArray.getArrayKey(key); // From the base file key
   final long off = (_iceRoot != null) ? 0 : ValueArray.getChunkOffset(key); // The offset
   final Path p =
       (_iceRoot != null)
           ? new Path(_iceRoot, getIceName(key, (byte) 'V'))
           : new Path(arykey.toString());
   final Size sz = new Size();
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           long rem = fs.getFileStatus(p).getLen() - off;
           sz._value = (rem > ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem;
           return null;
         }
       },
       true,
       0);
   Value val = new Value(key, sz._value, Value.HDFS);
   val.setdsk(); // But its already on disk.
   return val;
 }

Example #3

Show file

File: PersistNFS.java Project: pragnesh/h2o

  @Override
  public Value lazyArrayChunk(Key key) {
    Key arykey = ValueArray.getArrayKey(key); // From the base file key
    long off = ValueArray.getChunkOffset(key); // The offset
    long size = getFileForKey(arykey).length();
    long rem = size - off;

    // the last chunk can be fat, so it got packed into the earlier chunk
    if (rem < ValueArray.CHUNK_SZ && off > 0) return null;
    int sz = (rem >= ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem;
    Value val = new Value(key, sz, Value.NFS);
    val.setdsk(); // But its already on disk.
    return val;
  }

Example #4

Show file

File: PersistHdfs.java Project: pragnesh/h2o

 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   final Path p;
   if (_iceRoot != null) {
     p = new Path(_iceRoot, getIceName(v));
   } else {
     // Convert an arraylet chunk into a long-offset from the base file.
     if (k._kb[0] == Key.ARRAYLET_CHUNK) {
       skip = ValueArray.getChunkOffset(k); // The offset
       k = ValueArray.getArrayKey(k); // From the base file key
       if (k.toString().endsWith(Extensions.HEX)) { // Hex file?
         int value_len = DKV.get(k).memOrLoad().length; // How long is the ValueArray header?
         skip += value_len;
       }
     }
     p = new Path(k.toString());
   }
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }

Example #5

Show file

File: PersistHdfs.java Project: jayfans3/h2o

 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   if (k._kb[0] == Key.ARRAYLET_CHUNK) {
     skip = ValueArray.getChunkOffset(k); // The offset
     k = ValueArray.getArrayKey(k); // From the base file key
   } else if (k._kb[0] == Key.DVEC) {
     skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset
   }
   final Path p =
       _iceRoot == null ? new Path(getPathForKey(k)) : new Path(_iceRoot, getIceName(v));
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }

Example #6

Show file

File: DParseTask.java Project: patricktoohey/h2o

  /**
   * Map function for distributed parsing of the CSV files.
   *
   * <p>In first phase it calculates the min, max, means, encodings and other statistics about the
   * dataset, determines the number of columns.
   *
   * <p>The second pass then encodes the parsed dataset to the result key, splitting it into equal
   * sized chunks.
   */
  @Override
  public void map(Key key) {
    try {
      Key aryKey = null;
      boolean arraylet = key._kb[0] == Key.ARRAYLET_CHUNK;
      boolean skipFirstLine = _skipFirstLine;
      if (arraylet) {
        aryKey = ValueArray.getArrayKey(key);
        _chunkId = ValueArray.getChunkIndex(key);
        skipFirstLine = skipFirstLine || (ValueArray.getChunkIndex(key) != 0);
      }
      switch (_phase) {
        case ONE:
          assert (_ncolumns != 0);
          // initialize the column statistics
          phaseOneInitialize();
          // perform the parse
          CsvParser p = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine);
          p.parse(key);
          if (arraylet) {
            long idx = ValueArray.getChunkIndex(key);
            int idx2 = (int) idx;
            assert idx2 == idx;
            assert (_nrows[idx2] == 0)
                : idx
                    + ": "
                    + Arrays.toString(_nrows)
                    + " ("
                    + _nrows[idx2]
                    + " -- "
                    + _myrows
                    + ")";
            _nrows[idx2] = _myrows;
          }
          break;
        case TWO:
          assert (_ncolumns != 0);
          // initialize statistics - invalid rows, sigma and row size
          phaseTwoInitialize();
          // calculate the first row and the number of rows to parse
          int firstRow = 0;
          int lastRow = _myrows;
          _myrows = 0;
          if (arraylet) {
            long origChunkIdx = ValueArray.getChunkIndex(key);
            firstRow = (origChunkIdx == 0) ? 0 : _nrows[(int) origChunkIdx - 1];
            lastRow = _nrows[(int) origChunkIdx];
          }
          int rowsToParse = lastRow - firstRow;
          // create the output streams
          _outputStreams2 = createRecords(firstRow, rowsToParse);
          assert (_outputStreams2.length > 0);
          _ab = _outputStreams2[0].initialize();
          // perform the second parse pass
          CsvParser p2 = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine);
          p2.parse(key);
          // store the last stream if not stored during the parse
          if (_ab != null) _outputStreams2[_outputIdx].store();
          break;
        default:
          assert (false);
      }

      ParseStatus.update(_resultKey, DKV.get(key).length(), _phase);
    } catch (Exception e) {
      e.printStackTrace();
      _error = e.getMessage();
    }
  }