public Key localModelInfoKey(H2ONode node) {
   return Key.make(
       get_params()._model_id + ".node" + node.index(),
       (byte) 1 /*replica factor*/,
       (byte) 31 /*hidden user-key*/,
       true,
       node);
 }
예제 #2
0
파일: Key.java 프로젝트: BersaKAIN/h2o
  // *Desired* distribution function on keys & replication factor. Replica #0
  // is the master, replica #1, 2, 3, etc represent additional desired
  // replication nodes. Note that this function is just the distribution
  // function - it does not DO any replication, nor does it dictate any policy
  // on how fast replication occurs. Returns -1 if the desired replica
  // is nonsense, e.g. asking for replica #3 in a 2-Node system.
  int D(int repl) {
    int hsz = H2O.CLOUD.size();

    // See if this is a specifically homed Key
    if (!user_allowed() && repl < _kb[1]) { // Asking for a replica# from the homed list?
      assert _kb[0] != Key.DVEC;
      H2ONode h2o = H2ONode.intern(_kb, 2 + repl * (4 + 2 /*serialized bytesize of H2OKey*/));
      // Reverse the home to the index
      int idx = h2o.index();
      if (idx >= 0) return idx;
      // Else homed to a node which is no longer in the cloud!
      // Fall back to the normal home mode
    }

    // Distribution of Fluid Vectors is a special case.
    // Fluid Vectors are grouped into vector groups, each of which must have
    // the same distribution of chunks so that MRTask2 run over group of
    // vectors will keep data-locality.  The fluid vecs from the same group
    // share the same key pattern + each has 4 bytes identifying particular
    // vector in the group.  Since we need the same chunks end up on the same
    // node in the group, we need to skip the 4 bytes containing vec# from the
    // hash.  Apart from that, we keep the previous mode of operation, so that
    // ByteVec would have first 64MB distributed around cloud randomly and then
    // go round-robin in 64MB chunks.
    if (_kb[0] == DVEC) {
      // Homed Chunk?
      if (_kb[1] != -1) throw H2O.unimpl();
      // For round-robin on Chunks in the following pattern:
      // 1 Chunk-per-node, until all nodes have 1 chunk (max parallelism).
      // Then 2 chunks-per-node, once around, then 4, then 8, then 16.
      // Getting several chunks-in-a-row on a single Node means that stencil
      // calculations that step off the end of one chunk into the next won't
      // force a chunk local - replicating the data.  If all chunks round robin
      // exactly, then any stencil calc will double the cached volume of data
      // (every node will have it's own chunk, plus a cached next-chunk).
      // Above 16-chunks-in-a-row we hit diminishing returns.
      int cidx = UnsafeUtils.get4(_kb, 1 + 1 + 4); // Chunk index
      int x = cidx / hsz; // Multiples of cluster size
      // 0 -> 1st trip around the cluster;            nidx= (cidx- 0*hsz)>>0
      // 1,2 -> 2nd & 3rd trip; allocate in pairs:    nidx= (cidx- 1*hsz)>>1
      // 3,4,5,6 -> next 4 rounds; allocate in quads: nidx= (cidx- 3*hsz)>>2
      // 7-14 -> next 8 rounds in octets:             nidx= (cidx- 7*hsz)>>3
      // 15+ -> remaining rounds in groups of 16:     nidx= (cidx-15*hsz)>>4
      int z = x == 0 ? 0 : (x <= 2 ? 1 : (x <= 6 ? 2 : (x <= 14 ? 3 : 4)));
      int nidx = (cidx - ((1 << z) - 1) * hsz) >> z;
      return ((nidx + repl) & 0x7FFFFFFF) % hsz;
    }

    // Easy Cheesy Stupid:
    return ((_hash + repl) & 0x7FFFFFFF) % hsz;
  }