/**
  * Initialization of neural net weights cf.
  * http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf
  */
 private void randomizeWeights() {
   for (int w = 0; w < dense_row_weights.length; ++w) {
     final Random rng =
         water.util.RandomUtils.getRNG(
             get_params()._seed + 0xBAD5EED + w + 1); // to match NeuralNet behavior
     final double range = Math.sqrt(6. / (units[w] + units[w + 1]));
     for (int i = 0; i < get_weights(w).rows(); i++) {
       for (int j = 0; j < get_weights(w).cols(); j++) {
         if (get_params()._initial_weight_distribution
             == DeepLearningParameters.InitialWeightDistribution.UniformAdaptive) {
           // cf. http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf
           if (w == dense_row_weights.length - 1 && _classification)
             get_weights(w)
                 .set(
                     i,
                     j,
                     (float)
                         (4.
                             * uniformDist(
                                 rng, -range,
                                 range))); // Softmax might need an extra factor 4, since it's like
                                           // a sigmoid
           else get_weights(w).set(i, j, (float) uniformDist(rng, -range, range));
         } else if (get_params()._initial_weight_distribution
             == DeepLearningParameters.InitialWeightDistribution.Uniform) {
           get_weights(w)
               .set(
                   i,
                   j,
                   (float)
                       uniformDist(
                           rng,
                           -get_params()._initial_weight_scale,
                           get_params()._initial_weight_scale));
         } else if (get_params()._initial_weight_distribution
             == DeepLearningParameters.InitialWeightDistribution.Normal) {
           get_weights(w)
               .set(i, j, (float) (rng.nextGaussian() * get_params()._initial_weight_scale));
         }
       }
     }
   }
 }
Example #2
0
    @Override
    public void map(Chunk[] cs) {
      int N = cs.length - (_hasWeight ? 1 : 0);
      double[] values = new double[N];
      ArrayList<double[]> list = new ArrayList<>();
      Random rand = RandomUtils.getRNG(_seed + cs[0].start());
      ClusterDist cd = new ClusterDist();

      for (int row = 0; row < cs[0]._len; row++) {
        data(values, cs, row, _means, _mults, _modes);
        double sqr = minSqr(_centers, values, _isCats, cd);
        if (_probability * sqr > rand.nextDouble() * _sqr) list.add(values.clone());
      }

      _sampled = new double[list.size()][];
      list.toArray(_sampled);
      _centers = null;
      _means = _mults = null;
      _modes = null;
    }
Example #3
0
    // Initialize cluster centers
    double[][] initial_centers(
        KMeansModel model,
        final Vec[] vecs,
        final double[] means,
        final double[] mults,
        final int[] modes) {

      // Categoricals use a different distance metric than numeric columns.
      model._output._categorical_column_count = 0;
      _isCats = new String[vecs.length][];
      for (int v = 0; v < vecs.length; v++) {
        _isCats[v] = vecs[v].isCategorical() ? new String[0] : null;
        if (_isCats[v] != null) model._output._categorical_column_count++;
      }

      Random rand = water.util.RandomUtils.getRNG(_parms._seed - 1);
      double centers[][]; // Cluster centers
      if (null != _parms._user_points) { // User-specified starting points
        Frame user_points = _parms._user_points.get();
        int numCenters = (int) user_points.numRows();
        int numCols = model._output.nfeatures();
        centers = new double[numCenters][numCols];
        Vec[] centersVecs = user_points.vecs();
        // Get the centers and standardize them if requested
        for (int r = 0; r < numCenters; r++) {
          for (int c = 0; c < numCols; c++) {
            centers[r][c] = centersVecs[c].at(r);
            centers[r][c] = data(centers[r][c], c, means, mults, modes);
          }
        }
      } else { // Random, Furthest, or PlusPlus initialization
        if (_parms._init == Initialization.Random) {
          // Initialize all cluster centers to random rows
          centers = new double[_parms._k][model._output.nfeatures()];
          for (double[] center : centers) randomRow(vecs, rand, center, means, mults, modes);
        } else {
          centers = new double[1][model._output.nfeatures()];
          // Initialize first cluster center to random row
          randomRow(vecs, rand, centers[0], means, mults, modes);

          model._output._iterations = 0;
          while (model._output._iterations < 5) {
            // Sum squares distances to cluster center
            SumSqr sqr = new SumSqr(centers, means, mults, modes, _isCats).doAll(vecs);

            // Sample with probability inverse to square distance
            Sampler sampler =
                new Sampler(
                        centers,
                        means,
                        mults,
                        modes,
                        _isCats,
                        sqr._sqr,
                        _parms._k * 3,
                        _parms._seed,
                        hasWeightCol())
                    .doAll(vecs);
            centers = ArrayUtils.append(centers, sampler._sampled);

            // Fill in sample centers into the model
            if (!isRunning()) return null; // Stopped/cancelled
            model._output._centers_raw = destandardize(centers, _isCats, means, mults);
            model._output._tot_withinss = sqr._sqr / _train.numRows();

            model._output._iterations++; // One iteration done

            model.update(
                _key); // Make early version of model visible, but don't update progress using
            // update(1)
          }
          // Recluster down to k cluster centers
          centers = recluster(centers, rand, _parms._k, _parms._init, _isCats);
          model._output._iterations = 0; // Reset iteration count
        }
      }
      return centers;
    }
Example #4
0
  /**
   * Extracts the values, applies regularization to numerics, adds appropriate offsets to
   * categoricals, and adapts response according to the CaseMode/CaseValue if set.
   */
  @Override
  public final void map(Chunk[] chunks, NewChunk[] outputs) {
    if (_jobKey != null && !Job.isRunning(_jobKey)) throw new JobCancelledException();
    final int nrows = chunks[0]._len;
    final long offset = chunks[0].start();
    boolean doWork = chunkInit();
    if (!doWork) return;
    final boolean obs_weights = _dinfo._weights && !_fr.vecs()[_dinfo.weightChunkId()].isConst();
    final double global_weight_sum =
        obs_weights ? _fr.vecs()[_dinfo.weightChunkId()].mean() * _fr.numRows() : 0;

    DataInfo.Row row = _dinfo.newDenseRow();
    double[] weight_map = null;
    double relative_chunk_weight = 1;
    // TODO: store node-local helper arrays in _dinfo -> avoid re-allocation and construction
    if (obs_weights) {
      weight_map = new double[nrows];
      double weight_sum = 0;
      for (int i = 0; i < nrows; ++i) {
        row = _dinfo.extractDenseRow(chunks, i, row);
        weight_sum += row.weight;
        weight_map[i] = weight_sum;
        assert (i == 0 || row.weight == 0 || weight_map[i] > weight_map[i - 1]);
      }
      if (weight_sum > 0) {
        ArrayUtils.div(weight_map, weight_sum); // normalize to 0...1
        relative_chunk_weight = global_weight_sum * nrows / _fr.numRows() / weight_sum;
      } else return; // nothing to do here - all rows have 0 weight
    }

    // Example:
    // _useFraction = 0.8 -> 1 repeat with fraction = 0.8
    // _useFraction = 1.0 -> 1 repeat with fraction = 1.0
    // _useFraction = 1.1 -> 2 repeats with fraction = 0.55
    // _useFraction = 2.1 -> 3 repeats with fraction = 0.7
    // _useFraction = 3.0 -> 3 repeats with fraction = 1.0
    final int repeats = (int) Math.ceil(_useFraction * relative_chunk_weight);
    final float fraction = (float) (_useFraction * relative_chunk_weight) / repeats;
    assert (fraction <= 1.0);

    final boolean sample = (fraction < 0.999 || obs_weights || _shuffle);
    final Random skip_rng =
        sample
            ? RandomUtils.getRNG(
                (0x8734093502429734L + _seed + offset) * (_iteration + 0x9823423497823423L))
            : null;

    long num_processed_rows = 0;
    for (int rep = 0; rep < repeats; ++rep) {
      for (int row_idx = 0; row_idx < nrows; ++row_idx) {
        int r = sample ? -1 : 0;
        // only train with a given number of training samples (fraction*nrows)
        if (sample && !obs_weights && skip_rng.nextDouble() > fraction) continue;
        if (obs_weights
            && num_processed_rows % 2
                == 0) { // every second row is randomly sampled -> that way we won't "forget" rare
          // rows
          // importance sampling based on inverse of cumulative distribution
          double key = skip_rng.nextDouble();
          r = Arrays.binarySearch(weight_map, 0, nrows, key);
          //          Log.info(Arrays.toString(weight_map));
          //          Log.info("key: " + key + " idx: " + (r >= 0 ? r : (-r-1)));
          if (r < 0) r = -r - 1;
          assert (r == 0 || weight_map[r] > weight_map[r - 1]);
        } else if (r == -1) {
          do {
            r = skip_rng.nextInt(nrows); // random sampling (with replacement)
          }
          // if we have weights, and we did the %2 skipping above, then we need to find an alternate
          // row with non-zero weight
          while (obs_weights
              && ((r == 0 && weight_map[0] == 0) || (r > 0 && weight_map[r] == weight_map[r - 1])));
        } else {
          assert (!obs_weights);
          r = row_idx; // linear scan - slightly faster
        }
        assert (r >= 0 && r <= nrows);

        row = _dinfo.extractDenseRow(chunks, r, row);
        if (!row.bad) {
          assert (row.weight
              > 0); // check that we never process a row that was held out via row.weight = 0
          long seed = offset + rep * nrows + r;
          if (outputs != null && outputs.length > 0) processRow(seed++, row, outputs);
          else processRow(seed++, row);
        }
        num_processed_rows++;
      }
    }
    assert (fraction != 1 || num_processed_rows == repeats * nrows);
    chunkDone(num_processed_rows);
  }