Пример #1
0
 @Override
 public void callback(final GLMTask.GLMLineSearchTask glmt) {
   double step = 0.5;
   for (int i = 0; i < glmt._objvals.length; ++i) {
     if (!needLineSearch(glmt._betas[i], glmt._objvals[i], step)) {
       Log.info("GLM2 (iteration=" + _iter + ") line search: found admissible step=" + step);
       _lastResult =
           null; // set last result to null so that the Iteration will not attempt to verify
       // whether or not it should do the line search.
       new GLMIterationTask(
               GLM2.this,
               _activeData,
               _glm,
               true,
               true,
               true,
               glmt._betas[i],
               _ymu,
               _reg,
               new Iteration())
           .asyncExec(_activeData._adaptedFrame);
       return;
     }
     step *= 0.5;
   } // no line step worked, forcibly converge
   Log.info(
       "GLM2 (iteration="
           + _iter
           + ") line search failed to find feasible step. Forcibly converged.");
   nextLambda(
       _lastResult._glmt.clone(),
       resizeVec(_lastResult._glmt._beta, _activeCols, _lastResult._activeCols));
 }
Пример #2
0
  private static void addFolder2(
      FileSystem fs, Path p, ArrayList<String> keys, ArrayList<String> failed) {
    try {
      if (fs == null) return;

      Futures futures = new Futures();
      for (FileStatus file : fs.listStatus(p)) {
        Path pfs = file.getPath();
        if (file.isDir()) {
          addFolder2(fs, pfs, keys, failed);
        } else {
          long size = file.getLen();
          Key res;
          if (pfs.getName().endsWith(Extensions.JSON)) {
            throw H2O.unimpl();
          } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
            throw H2O.unimpl();
          } else {
            Key k = null;
            keys.add((k = HdfsFileVec.make(file, futures)).toString());
            Log.info("PersistHdfs: DKV.put(" + k + ")");
          }
        }
      }
    } catch (Exception e) {
      Log.err(e);
      failed.add(p.toString());
    }
  }
Пример #3
0
    // Handle the case where some centers go dry.  Rescue only 1 cluster
    // per iteration ('cause we only tracked the 1 worst row)
    boolean cleanupBadClusters(
        Lloyds task,
        final Vec[] vecs,
        final double[][] centers,
        final double[] means,
        final double[] mults,
        final int[] modes) {
      // Find any bad clusters
      int clu;
      for (clu = 0; clu < _parms._k; clu++) if (task._size[clu] == 0) break;
      if (clu == _parms._k) return false; // No bad clusters

      long row = task._worst_row;
      Log.warn("KMeans: Re-initializing cluster " + clu + " to row " + row);
      data(centers[clu] = task._cMeans[clu], vecs, row, means, mults, modes);
      task._size[clu] =
          1; // FIXME: PUBDEV-871 Some other cluster had their membership count reduced by one!
      // (which one?)

      // Find any MORE bad clusters; we only fixed the first one
      for (clu = 0; clu < _parms._k; clu++) if (task._size[clu] == 0) break;
      if (clu == _parms._k) return false; // No MORE bad clusters

      // If we see 2 or more bad rows, just re-run Lloyds to get the
      // next-worst row.  We don't count this as an iteration, because
      // we're not really adjusting the centers, we're trying to get
      // some centers *at-all*.
      Log.warn("KMeans: Re-running Lloyds to re-init another cluster");
      if (_reinit_attempts++ < _parms._k) {
        return true; // Rerun Lloyds, and assign points to centroids
      } else {
        _reinit_attempts = 0;
        return false;
      }
    }
Пример #4
0
  protected static String encodeRedirectArgs(JsonObject args, Object[] args2) {
    if (args == null && args2 == null) return "";
    if (args2 != null) {
      StringBuilder sb = new StringBuilder();
      assert (args2.length & 1) == 0
          : "Number of arguments shoud be power of 2."; // Must be field-name / value pairs
      for (int i = 0; i < args2.length; i += 2) {
        sb.append(i == 0 ? '?' : '&').append(args2[i]).append('=');
        try {
          sb.append(URLEncoder.encode(args2[i + 1].toString(), "UTF-8"));
        } catch (UnsupportedEncodingException ex) {
          throw Log.errRTExcept(ex);
        }
      }
      return sb.toString();
    }

    StringBuilder sb = new StringBuilder();
    sb.append("?");
    for (Map.Entry<String, JsonElement> entry : args.entrySet()) {
      JsonElement e = entry.getValue();
      if (sb.length() != 1) sb.append("&");
      sb.append(entry.getKey());
      sb.append("=");
      try {
        sb.append(URLEncoder.encode(e.getAsString(), "UTF-8"));
      } catch (UnsupportedEncodingException ex) {
        throw Log.errRTExcept(ex);
      }
    }
    return sb.toString();
  }
Пример #5
0
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public RemoveAllV3 remove(int version, RemoveAllV3 u) {
    Log.info("Removing all objects");
    Futures fs = new Futures();
    for (Job j : Job.jobs()) {
      j.cancel();
      j.remove(fs);
    }
    fs.blockForPending();
    // Bulk brainless key removal.  Completely wipes all Keys without regard.
    new MRTask() {
      @Override
      public byte priority() {
        return H2O.GUI_PRIORITY;
      }

      @Override
      public void setupLocal() {
        H2O.raw_clear();
        water.fvec.Vec.ESPC.clear();
      }
    }.doAllNodes();
    Log.info("Finished removing objects");
    return u;
  }
Пример #6
0
  @Test
  public void testExpandCatsIris() throws InterruptedException, ExecutionException {
    double[][] iris =
        ard(
            ard(6.3, 2.5, 4.9, 1.5, 1),
            ard(5.7, 2.8, 4.5, 1.3, 1),
            ard(5.6, 2.8, 4.9, 2.0, 2),
            ard(5.0, 3.4, 1.6, 0.4, 0),
            ard(6.0, 2.2, 5.0, 1.5, 2));
    double[][] iris_expandR =
        ard(
            ard(0, 1, 0, 6.3, 2.5, 4.9, 1.5),
            ard(0, 1, 0, 5.7, 2.8, 4.5, 1.3),
            ard(0, 0, 1, 5.6, 2.8, 4.9, 2.0),
            ard(1, 0, 0, 5.0, 3.4, 1.6, 0.4),
            ard(0, 0, 1, 6.0, 2.2, 5.0, 1.5));
    String[] iris_cols = new String[] {"sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"};
    String[][] iris_domains =
        new String[][] {null, null, null, null, new String[] {"setosa", "versicolor", "virginica"}};

    Frame fr = null;
    try {
      fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
      DataInfo dinfo =
          new DataInfo(
              Key.make(),
              fr,
              null,
              0,
              true,
              DataInfo.TransformType.NONE,
              DataInfo.TransformType.NONE,
              false,
              false,
              false, /* weights */
              false, /* offset */
              false, /* fold */
              false);

      Log.info("Original matrix:\n" + colFormat(iris_cols, "%8.7s") + ArrayUtils.pprint(iris));
      double[][] iris_perm = ArrayUtils.permuteCols(iris, dinfo._permutation);
      Log.info(
          "Permuted matrix:\n"
              + colFormat(iris_cols, "%8.7s", dinfo._permutation)
              + ArrayUtils.pprint(iris_perm));

      double[][] iris_exp = GLRM.expandCats(iris_perm, dinfo);
      Log.info(
          "Expanded matrix:\n"
              + colExpFormat(iris_cols, iris_domains, "%8.7s", dinfo._permutation)
              + ArrayUtils.pprint(iris_exp));
      Assert.assertArrayEquals(iris_expandR, iris_exp);
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (fr != null) fr.delete();
    }
  }
Пример #7
0
    private void sendAck() {
      // Send results back
      DTask dt, origDt = _dt; // _dt can go null the instant it is send over wire
      assert origDt != null; // Freed after completion
      while ((dt = _dt) != null) { // Retry loop for broken TCP sends
        AutoBuffer ab = null;
        try {
          // Start the ACK with results back to client.  If the client is
          // asking for a class/id mapping (or any job running at FETCH_ACK
          // priority) then return a udp.fetchack byte instead of a udp.ack.
          // The receiver thread then knows to handle the mapping at the higher
          // priority.

          UDP.udp udp = dt.priority() == H2O.FETCH_ACK_PRIORITY ? UDP.udp.fetchack : UDP.udp.ack;
          ab = new AutoBuffer(_client, udp._prior).putTask(udp, _tsknum).put1(SERVER_UDP_SEND);
          assert ab.position() == 1 + 2 + 4 + 1;
          dt.write(ab); // Write the DTask - could be very large write
          dt._repliedTcp = ab.hasTCP(); // Resends do not need to repeat TCP result
          ab.close(); // Then close; send final byte
          _computedAndReplied = true; // After the final handshake, set computed+replied bit
          break; // Break out of retry loop
        } catch (AutoBuffer.AutoBufferException e) {
          if (!_client._heartbeat._client) // Report on servers only; clients allowed to be flaky
          Log.info(
                "IOException during ACK, "
                    + e._ioe.getMessage()
                    + ", t#"
                    + _tsknum
                    + " AB="
                    + ab
                    + ", waiting and retrying...");
          ab.drainClose();
          if (_client._heartbeat._client) // Dead client will not accept a TCP ACK response?
          this.CAS_DT(dt, null); // cancel the ACK
          try {
            Thread.sleep(100);
          } catch (InterruptedException ignore) {
          }
        } catch (Exception e) { // Custom serializer just barfed?
          Log.err(e); // Log custom serializer exception
          ab.drainClose();
        }
      } // end of while(true)
      if (dt == null)
        Log.info(
            "Cancelled remote task#"
                + _tsknum
                + " "
                + origDt.getClass()
                + " to "
                + _client
                + " has been cancelled by remote");
      else {
        if (dt instanceof MRTask && dt.logVerbose())
          Log.debug("Done remote task#" + _tsknum + " " + dt.getClass() + " to " + _client);
        _client.record_task_answer(this); // Setup for retrying Ack & AckAck, if not canceled
      }
    }
Пример #8
0
  @Test
  public void testCategoricalProstate() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS

    try {
      Scope.enter();
      train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
      for (int i = 0; i < cats.length; i++)
        Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key);
      train.remove("ID").remove();
      DKV.put(train._key, train);

      GLRMParameters parms = new GLRMParameters();
      parms._train = train._key;
      parms._k = 8;
      parms._gamma_x = parms._gamma_y = 0.1;
      parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._init = GLRM.Initialization.PlusPlus;
      parms._transform = DataInfo.TransformType.STANDARDIZE;
      parms._recover_svd = false;
      parms._max_iterations = 200;

      try {
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info(
            "Iteration "
                + model._output._iterations
                + ": Objective value = "
                + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info(
            "Numeric Sum of Squared Error = "
                + mm._numerr
                + "\tCategorical Misclassification Error = "
                + mm._caterr);
      } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        job.remove();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (train != null) train.delete();
      if (model != null) model.delete();
      Scope.exit();
    }
  }
Пример #9
0
 private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) {
   try {
     if (fs == null) return;
     for (FileStatus file : fs.listStatus(p)) {
       Path pfs = file.getPath();
       if (file.isDir()) {
         addFolder(fs, pfs, succeeded, failed);
       } else {
         Key k = Key.make(pfs.toString());
         long size = file.getLen();
         Value val = null;
         if (pfs.getName().endsWith(Extensions.JSON)) {
           JsonParser parser = new JsonParser();
           JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject();
           JsonElement v = json.get(Constants.VERSION);
           if (v == null) throw new RuntimeException("Missing version");
           JsonElement type = json.get(Constants.TYPE);
           if (type == null) throw new RuntimeException("Missing type");
           Class c = Class.forName(type.getAsString());
           OldModel model = (OldModel) c.newInstance();
           model.fromJson(json);
         } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
           FSDataInputStream s = fs.open(pfs);
           int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg
           byte[] mem = MemoryManager.malloc1(sz);
           s.readFully(mem);
           // Convert to a ValueArray (hope it fits in 1Meg!)
           ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem));
           val = new Value(k, ary, Value.HDFS);
         } else if (size >= 2 * ValueArray.CHUNK_SZ) {
           val =
               new Value(
                   k,
                   new ValueArray(k, size),
                   Value.HDFS); // ValueArray byte wrapper over a large file
         } else {
           val = new Value(k, (int) size, Value.HDFS); // Plain Value
           val.setdsk();
         }
         DKV.put(k, val);
         Log.info("PersistHdfs: DKV.put(" + k + ")");
         JsonObject o = new JsonObject();
         o.addProperty(Constants.KEY, k.toString());
         o.addProperty(Constants.FILE, pfs.toString());
         o.addProperty(Constants.VALUE_SIZE, file.getLen());
         succeeded.add(o);
       }
     }
   } catch (Exception e) {
     Log.err(e);
     JsonObject o = new JsonObject();
     o.addProperty(Constants.FILE, p.toString());
     o.addProperty(Constants.ERROR, e.getMessage());
     failed.add(o);
   }
 }
Пример #10
0
  public static void main(String[] args) throws Exception {
    // Can be necessary to run in parallel to other clouds, so find open ports
    int[] ports = new int[3];
    int port = 54321;
    for( int i = 0; i < ports.length; i++ ) {
      for( ;; ) {
        if( isOpen(port) && isOpen(port + 1) ) {
          ports[i] = port;
          port += 2;
          break;
        }
        port++;
      }
    }
    String flat = "";
    for( int i = 0; i < ports.length; i++ )
      flat += "127.0.0.1:" + ports[i] + "\n";
    // Force all IPs to local so that users can run with a firewall
    String[] a = new String[] { "-ip", "127.0.0.1", "-flatfile", Utils.writeFile(flat).getAbsolutePath() };
    H2O.OPT_ARGS.ip = "127.0.0.1";
    args = (String[]) ArrayUtils.addAll(a, args);

    ArrayList<Node> nodes = new ArrayList<Node>();
    for( int i = 1; i < ports.length; i++ )
      nodes.add(new NodeVM(Utils.append(args, "-port", "" + ports[i])));

    args = Utils.append(new String[] { "-mainClass", Master.class.getName() }, args);
    Node master = new NodeVM(Utils.append(args, "-port", "" + ports[0]));
    nodes.add(master);

    File out = null, err = null, sandbox = new File("sandbox");
    sandbox.mkdirs();
    Utils.clearFolder(sandbox);
    for( int i = 0; i < nodes.size(); i++ ) {
      out = File.createTempFile("junit-" + i + "-out-", null, sandbox);
      err = File.createTempFile("junit-" + i + "-err-", null, sandbox);
      nodes.get(i).persistIO(out.getAbsolutePath(), err.getAbsolutePath());
      nodes.get(i).start();
    }

    int exit = master.waitFor();
    if( exit != 0 ) {
      Log.log(out, System.out);
      Thread.sleep(100); // Or mixed (?)
      Log.log(err, System.err);
    }
    for( Node node : nodes )
      node.kill();
    if( exit == 0 )
      System.out.println("OK");
    System.exit(exit);
  }
Пример #11
0
  @Test
  public void testChunks() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 3.0;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.418
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==1993);
    output.remove();
    agg.remove();

    for (int i : new int[] {1, 2, 5, 10, 50, 100}) {
      Key key = Key.make();
      RebalanceDataSet rb = new RebalanceDataSet(frame, key, i);
      H2O.submitTask(rb);
      rb.join();
      Frame rebalanced = DKV.get(key).get();

      parms = new AggregatorModel.AggregatorParameters();
      parms._train = frame._key;
      parms._radius_scale = 3.0;
      start = System.currentTimeMillis();
      AggregatorModel agg2 =
          new Aggregator(parms).trainModel().get(); // 0.373 0.504 0.357 0.454 0.368 0.355
      System.out.println(
          "AggregatorModel finished in: "
              + (System.currentTimeMillis() - start) / 1000.
              + " seconds");
      agg2.checkConsistency();
      Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length);
      rebalanced.delete();
      Assert.assertTrue(
          Math.abs(agg._exemplars.length - agg2._exemplars.length)
              == 0); // < agg._exemplars.length*0);
      output = agg2._output._output_frame.get();
      output.remove();
      agg2.remove();
    }
    frame.delete();
  }
Пример #12
0
 static {
   InputStream resource = Boot._init.getResource2("/page.html");
   try {
     _htmlTemplate =
         new String(ByteStreams.toByteArray(resource)).replace("%cloud_name", H2O.NAME);
   } catch (NullPointerException e) {
     Log.err(e);
     Log.die("page.html not found in resources.");
   } catch (Exception e) {
     Log.err(e);
     Log.die(e.getMessage());
   } finally {
     Closeables.closeQuietly(resource);
   }
 }
Пример #13
0
    public void handle(
        String target,
        Request baseRequest,
        HttpServletRequest request,
        HttpServletResponse response)
        throws IOException, ServletException {
      if (!H2O.ARGS.ldap_login) {
        return;
      }

      String loginName = request.getUserPrincipal().getName();
      if (!loginName.equals(H2O.ARGS.user_name)) {
        Log.warn(
            "Login name ("
                + loginName
                + ") does not match cluster owner name ("
                + H2O.ARGS.user_name
                + ")");
        sendResponseError(
            response,
            HttpServletResponse.SC_UNAUTHORIZED,
            "Login name does not match cluster owner name");
        baseRequest.setHandled(true);
      }
    }
Пример #14
0
 // TCP large RECEIVE of results.  Note that 'this' is NOT the RPC object
 // that is hoping to get the received object, nor is the current thread the
 // RPC thread blocking for the object.  The current thread is the TCP
 // reader thread.
 static void tcp_ack(final AutoBuffer ab) throws IOException {
   // Get the RPC we're waiting on
   int task = ab.getTask();
   RPC rpc = ab._h2o.taskGet(task);
   // Race with canceling a large RPC fetch: Task is already dead.  Do not
   // bother reading from the TCP socket, just bail out & close socket.
   if (rpc == null || rpc._done) {
     ab.drainClose();
   } else {
     assert rpc._tasknum == task;
     assert !rpc._done;
     // Here we have the result, and we're on the correct Node but wrong
     // Thread.  If we just return, the TCP reader thread will close the
     // remote, the remote will UDP ACK the RPC back, and back on the current
     // Node but in the correct Thread, we'd wake up and realize we received a
     // large result.
     try {
       rpc.response(ab);
     } catch (AutoBuffer.AutoBufferException e) {
       // If TCP fails, we will have done a short-read crushing the original
       // _dt object, and be unable to resend.  This is fatal right now.
       // Really: an unimplemented feature; fix is to notice that a partial
       // TCP read means that the server (1) got our remote_exec request, (2)
       // has computed an answer and was trying to send it to us, (3) failed
       // sending via TCP hence the server knows it failed and will send again
       // without any further work from us.  We need to disable all the resend
       // & retry logic, and wait for the server to re-send our result.
       // Meanwhile the _dt object is crushed with half-read crap, and cannot
       // be trusted except in the base fields.
       throw Log.throwErr(e._ioe);
     }
   }
   // ACKACK the remote, telling him "we got the answer"
   new AutoBuffer(ab._h2o, H2O.ACK_ACK_PRIORITY).putTask(UDP.udp.ackack.ordinal(), task).close();
 }
Пример #15
0
  @Override
  protected void checkMemoryFootPrint() {
    if (_model._output._ntrees == 0) return;
    int trees_so_far = _model._output._ntrees; // existing trees
    long model_mem_size =
        new ComputeModelSize(trees_so_far, _model._output._treeKeys).doAllNodes()._model_mem_size;
    _model._output._treeStats._byte_size = model_mem_size;
    double avg_tree_mem_size = (double) model_mem_size / trees_so_far;
    Log.debug(
        "Average tree size (for all classes): " + PrettyPrint.bytes((long) avg_tree_mem_size));

    // all the compressed trees are stored on the driver node
    long max_mem = H2O.SELF.get_max_mem();
    if (_parms._ntrees * avg_tree_mem_size > max_mem) {
      String msg =
          "The tree model will not fit in the driver node's memory ("
              + PrettyPrint.bytes((long) avg_tree_mem_size)
              + " per tree x "
              + _parms._ntrees
              + " > "
              + PrettyPrint.bytes(max_mem)
              + ") - try decreasing ntrees and/or max_depth or increasing min_rows!";
      error("_ntrees", msg);
      cancel(msg);
    }
  }
Пример #16
0
  /**
   * Score a frame with the given model and return just the metrics.
   *
   * <p>NOTE: ModelMetrics are now always being created by model.score. . .
   */
  @SuppressWarnings("unused") // called through reflection by RequestServer
  public ModelMetricsListSchemaV3 score(int version, ModelMetricsListSchemaV3 s) {
    // parameters checking:
    if (null == s.model) throw new H2OIllegalArgumentException("model", "predict", s.model);
    if (null == DKV.get(s.model.name))
      throw new H2OKeyNotFoundArgumentException("model", "predict", s.model.name);

    if (null == s.frame) throw new H2OIllegalArgumentException("frame", "predict", s.frame);
    if (null == DKV.get(s.frame.name))
      throw new H2OKeyNotFoundArgumentException("frame", "predict", s.frame.name);

    ModelMetricsList parms = s.createAndFillImpl();
    parms
        ._model
        .score(parms._frame, parms._predictions_name)
        .remove(); // throw away predictions, keep metrics as a side-effect
    ModelMetricsListSchemaV3 mm = this.fetch(version, s);

    // TODO: for now only binary predictors write an MM object.
    // For the others cons one up here to return the predictions frame.
    if (null == mm) mm = new ModelMetricsListSchemaV3();

    if (null == mm.model_metrics || 0 == mm.model_metrics.length) {
      Log.warn(
          "Score() did not return a ModelMetrics for model: " + s.model + " on frame: " + s.frame);
    }

    return mm;
  }
Пример #17
0
 public static void main(String[] args) throws Exception {
   Log._dontDie = true; // Ignore fatal class load error
   ArrayList<String> list = new ArrayList<String>();
   for (String name : Boot.getClasses()) {
     if (!name.equals("water.api.RequestServer")
         && !name.equals("water.External")
         && !name.startsWith("water.r.")) {
       Class c = Class.forName(name);
       if (Freezable.class.isAssignableFrom(c)) list.add(c.getName());
     }
   }
   Collections.sort(list);
   String s =
       ""
           + //
           "package water;\n"
           + //
           "\n"
           + //
           "// Do not edit - generated\n"
           + //
           "public class TypeMapGen {\n"
           + //
           "  static final String[] CLAZZES = {\n"
           + //
           "    \" BAD\",                     // 0: BAD\n"
           + //
           "    \"[B\",                       // 1: Array of Bytes\n";
   for (String c : list) s += "    \"" + c + "\",\n";
   s += "  };\n";
   s += "}";
   Utils.writeFile(new File("src/main/java/water/TypeMapGen.java"), s);
   Log.info("Generated TypeMap");
 }
Пример #18
0
 public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) {
   // Needs to be done also for help to initialize or argument records
   String query = checkArguments(args, type);
   switch (type) {
     case help:
       return wrap(server, build(Response.done(serveHelp())));
     case json:
     case www:
       if (log()) {
         String log = getClass().getSimpleName();
         for (Object arg : args.keySet()) {
           String value = args.getProperty((String) arg);
           if (value != null && value.length() != 0) log += " " + arg + "=" + value;
         }
         Log.debug(Sys.HTTPD, log);
       }
       if (query != null) return wrap(server, query, type);
       long time = System.currentTimeMillis();
       Response response = serve();
       response.setTimeStart(time);
       if (type == RequestType.json) return wrap(server, response.toJson());
       return wrap(server, build(response));
     case debug:
       response = serve_debug();
       return wrap(server, build(response));
     case query:
       return wrap(server, query);
     default:
       throw new RuntimeException("Invalid request type " + type.toString());
   }
 }
Пример #19
0
  @Test
  public void testAggregatorBinary() {
    CreateFrame cf = new CreateFrame();
    cf.rows = 1000;
    cf.cols = 10;
    cf.categorical_fraction = 0.6;
    cf.integer_fraction = 0.0;
    cf.binary_fraction = 0.0;
    cf.real_range = 100;
    cf.integer_range = 100;
    cf.missing_fraction = 0.1;
    cf.factors = 5;
    cf.seed = 1234;
    Frame frame = cf.execImpl().get();

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 1.0;
    parms._transform = DataInfo.TransformType.NORMALIZE;
    parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Binary;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.905
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    System.out.println(output.toTwoDimTable(0, 10));
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==649);
    output.remove();
    frame.remove();
    agg.remove();
  }
Пример #20
0
  @Override
  protected Response serve() {
    try {
      // pull everything local
      Log.info("ExportFiles processing (" + path + ")");
      if (DKV.get(src_key) == null)
        throw new IllegalArgumentException(src_key.toString() + " not found.");
      Object value = DKV.get(src_key).get();
      // create a stream to read the entire VA or Frame
      if (!(value instanceof ValueArray) && !(value instanceof Frame))
        throw new UnsupportedOperationException("Can only export Frames or ValueArrays.");
      InputStream csv =
          value instanceof ValueArray
              ? new ValueArray.CsvVAStream((ValueArray) value, null)
              : ((Frame) value).toCSV(true);
      String p2 = path.toLowerCase();
      if (p2.startsWith("hdfs://")) serveHdfs(csv);
      else if (p2.startsWith("s3n://")) serveHdfs(csv);
      else serveLocalDisk(csv);

      return RequestBuilders.Response.done(this);
    } catch (Throwable t) {
      return RequestBuilders.Response.error(t);
    }
  }
Пример #21
0
 /**
  * Compute the correct final quantile from these 4 values. If the lo and hi elements are equal,
  * use them. However if they differ, then there is no single value which exactly matches the
  * desired quantile. There are several well-accepted definitions in this case - including picking
  * either the lo or the hi, or averaging them, or doing a linear interpolation.
  *
  * @param lo the highest element less than or equal to the desired quantile
  * @param hi the lowest element greater than or equal to the desired quantile
  * @param row row number (zero based) of the lo element; high element is +1
  * @return desired quantile.
  */
 static double computeQuantile(
     double lo,
     double hi,
     double row,
     double nrows,
     double prob,
     QuantileModel.CombineMethod method) {
   if (lo == hi) return lo; // Equal; pick either
   if (method == null) method = QuantileModel.CombineMethod.INTERPOLATE;
   switch (method) {
     case INTERPOLATE:
       return linearInterpolate(lo, hi, row, nrows, prob);
     case AVERAGE:
       return 0.5 * (hi + lo);
     case LOW:
       return lo;
     case HIGH:
       return hi;
     default:
       Log.info(
           "Unknown even sample size quantile combination type: "
               + method
               + ". Doing linear interpolation.");
       return linearInterpolate(lo, hi, row, nrows, prob);
   }
 }
Пример #22
0
 private static void run(Callable c, boolean read, int size) {
   // Count all i/o time from here, including all retry overheads
   long start_io_ms = System.currentTimeMillis();
   while (true) {
     try {
       long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats
       c.call();
       TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS);
       break;
       // Explicitly ignore the following exceptions but
       // fail on the rest IOExceptions
     } catch (EOFException e) {
       ignoreAndWait(e, false);
     } catch (SocketTimeoutException e) {
       ignoreAndWait(e, false);
     } catch (S3Exception e) {
       // Preserve S3Exception before IOException
       // Since this is tricky code - we are supporting different HDFS version
       // New version declares S3Exception as IOException
       // But old versions (0.20.xxx) declares it as RuntimeException
       // So we have to catch it before IOException !!!
       ignoreAndWait(e, false);
     } catch (IOException e) {
       ignoreAndWait(e, true);
     } catch (Exception e) {
       throw Log.errRTExcept(e);
     }
   }
 }
Пример #23
0
 // filter the current active columns using the strong rules
 // note: strong rules are update so tha they keep all previous coefficients in, to prevent issues
 // with line-search
 private int[] activeCols(final double l1, final double l2, final double[] grad) {
   final double rhs = alpha[0] * (2 * l1 - l2);
   int[] cols = MemoryManager.malloc4(_dinfo.fullN());
   int selected = 0;
   int j = 0;
   if (_activeCols == null) _activeCols = new int[] {-1};
   for (int i = 0; i < _dinfo.fullN(); ++i)
     if ((j < _activeCols.length && i == _activeCols[j]) || grad[i] > rhs || grad[i] < -rhs) {
       cols[selected++] = i;
       if (j < _activeCols.length && i == _activeCols[j]) ++j;
     }
   if (!strong_rules_enabled || selected == _dinfo.fullN()) {
     _activeCols = null;
     _activeData._adaptedFrame = _dinfo._adaptedFrame;
     _activeData = _dinfo;
   } else {
     _activeCols = Arrays.copyOf(cols, selected);
     _activeData = _dinfo.filterExpandedColumns(_activeCols);
   }
   Log.info(
       "GLM2 strong rule at lambda="
           + l1
           + ", got "
           + selected
           + " active cols out of "
           + _dinfo.fullN()
           + " total.");
   return _activeCols;
 }
Пример #24
0
 @Override
 protected void init() {
   super.init();
   // Reject request if classification is required and response column is float
   // Argument a4class = find("classification"); // get UI control
   // String p4class = input("classification");  // get value from HTTP requests
   // if there is UI control and classification field was passed
   final boolean classificationFieldSpecified =
       true; // ROLLBACK: a4class!=null ? p4class!=null : /* we are not in UI so expect that
   // parameter is specified correctly */ true;
   if (!classificationFieldSpecified) { // can happen if a client sends a request which does not
     // specify classification parameter
     classification = response.isEnum();
     Log.warn(
         "Classification field is not specified - deriving according to response! The classification field set to "
             + classification);
   } else {
     if (classification && response.isFloat())
       throw new H2OIllegalArgumentException(
           find("classification"), "Requested classification on float column!");
     if (!classification && response.isEnum())
       throw new H2OIllegalArgumentException(
           find("classification"), "Requested regression on enum column!");
   }
 }
Пример #25
0
 /**
  * Helper to create the DataInfo object from training/validation frames and the DL parameters
  *
  * @param train Training frame
  * @param valid Validation frame
  * @param parms Model parameters
  * @param nClasses Number of response levels (1: regression, >=2: classification)
  * @return DataInfo
  */
 static DataInfo makeDataInfo(
     Frame train, Frame valid, DeepLearningParameters parms, int nClasses) {
   double x = 0.782347234;
   boolean identityLink = new Distribution(parms._distribution, parms._tweedie_power).link(x) == x;
   DataInfo dinfo =
       new DataInfo(
           train,
           valid,
           parms._autoencoder ? 0 : 1, // nResponses
           parms._autoencoder
               || parms._use_all_factor_levels, // use all FactorLevels for auto-encoder
           parms._standardize
               ? (parms._autoencoder
                   ? DataInfo.TransformType.NORMALIZE
                   : parms._sparse
                       ? DataInfo.TransformType.DESCALE
                       : DataInfo.TransformType.STANDARDIZE)
               : DataInfo.TransformType.NONE, // transform predictors
           !parms._standardize || train.lastVec().isCategorical()
               ? DataInfo.TransformType.NONE
               : identityLink
                   ? DataInfo.TransformType.STANDARDIZE
                   : DataInfo.TransformType
                       .NONE, // transform response for regression with identity link
           parms._missing_values_handling
               == DeepLearningParameters.MissingValuesHandling.Skip, // whether to skip missing
           false, // do not replace NAs in numeric cols with mean
           true, // always add a bucket for missing values
           parms._weights_column != null, // observation weights
           parms._offset_column != null,
           parms._fold_column != null);
   // Checks and adjustments:
   // 1) observation weights (adjust mean/sigmas for predictors and response)
   // 2) NAs (check that there's enough rows left)
   GLMTask.YMUTask ymt =
       new GLMTask.YMUTask(
               dinfo,
               nClasses,
               true,
               !parms._autoencoder && nClasses == 1,
               false,
               !parms._autoencoder)
           .doAll(dinfo._adaptedFrame);
   if (ymt._wsum == 0
       && parms._missing_values_handling == DeepLearningParameters.MissingValuesHandling.Skip)
     throw new H2OIllegalArgumentException(
         "No rows left in the dataset after filtering out rows with missing values. Ignore columns with many NAs or set missing_values_handling to 'MeanImputation'.");
   if (parms._weights_column != null && parms._offset_column != null) {
     Log.warn(
         "Combination of offset and weights can lead to slight differences because Rollupstats aren't weighted - need to re-calculate weighted mean/sigma of the response including offset terms.");
   }
   if (parms._weights_column != null
       && parms._offset_column == null /*FIXME: offset not yet implemented*/) {
     dinfo.updateWeightedSigmaAndMean(ymt._basicStats.sigma(), ymt._basicStats.mean());
     if (nClasses == 1)
       dinfo.updateWeightedSigmaAndMeanForResponse(
           ymt._basicStatsResponse.sigma(), ymt._basicStatsResponse.mean());
   }
   return dinfo;
 }
Пример #26
0
  public static void reserveTaskMem(long m) {
    final long bytes = m;
    while (!tryReserveTaskMem(bytes)) {
      try {
        ForkJoinPool.managedBlock(
            new ManagedBlocker() {
              @Override
              public boolean isReleasable() {
                return _taskMem.get() >= bytes;
              }

              @Override
              public boolean block() throws InterruptedException {
                synchronized (_taskMemLock) {
                  try {
                    _taskMemLock.wait();
                  } catch (InterruptedException e) {
                  }
                }
                return isReleasable();
              }
            });
      } catch (InterruptedException e) {
        throw Log.errRTExcept(e);
      }
    }
  }
Пример #27
0
  @Test
  public void sparseTester() {
    Storage.DenseVector dv = new Storage.DenseVector(20);
    dv.set(3, 0.21f);
    dv.set(7, 0.13f);
    dv.set(18, 0.14f);
    Storage.SparseVector sv = new Storage.SparseVector(dv);
    assert (sv.size() == 20);
    assert (sv.nnz() == 3);

    // dense treatment
    for (int i = 0; i < sv.size(); ++i) Log.info("sparse [" + i + "] = " + sv.get(i));

    // sparse treatment
    for (Storage.SparseVector.Iterator it = sv.begin(); !it.equals(sv.end()); it.next()) {
      //      Log.info(it.toString());
      Log.info(it.index() + " -> " + it.value());
    }

    Storage.DenseColMatrix dcm = new Storage.DenseColMatrix(3, 5);
    dcm.set(2, 1, 3.2f);
    dcm.set(1, 3, -1.2f);
    assert (dcm.get(2, 1) == 3.2f);
    assert (dcm.get(1, 3) == -1.2f);
    assert (dcm.get(0, 0) == 0f);

    Storage.DenseRowMatrix drm = new Storage.DenseRowMatrix(3, 5);
    drm.set(2, 1, 3.2f);
    drm.set(1, 3, -1.2f);
    assert (drm.get(2, 1) == 3.2f);
    assert (drm.get(1, 3) == -1.2f);
    assert (drm.get(0, 0) == 0f);

    Storage.SparseColMatrix scm = new Storage.SparseColMatrix(3, 5);
    scm.set(2, 1, 3.2f);
    scm.set(1, 3, -1.2f);
    assert (scm.get(2, 1) == 3.2f);
    assert (scm.get(1, 3) == -1.2f);
    assert (scm.get(0, 0) == 0f);

    Storage.SparseRowMatrix srm = new Storage.SparseRowMatrix(3, 5);
    srm.set(2, 1, 3.2f);
    srm.set(1, 3, -1.2f);
    assert (srm.get(2, 1) == 3.2f);
    assert (srm.get(1, 3) == -1.2f);
    assert (srm.get(0, 0) == 0f);
  }
Пример #28
0
 public static void setMemLow() {
   if (!CAN_ALLOC) return;
   synchronized (_lock) {
     CAN_ALLOC = false;
   }
   // NO LOGGING UNDER LOCK!
   Log.info(Sys.CLEAN, "Pausing to swap to disk; more memory may help");
 }
Пример #29
0
 // Write-lock 'this', delete any old thing, returns NEW guy
 public T delete_and_lock(Key job_key) {
   Lockable old = write_lock(job_key);
   if (old != null) {
     Log.debug(Log.Tag.Sys.LOCKS, "lock-then-clear " + _key + " by job " + job_key);
     old.delete_impl(new Futures()).blockForPending();
   }
   return (T) this;
 }
Пример #30
0
  @Test
  public void testCategoricalIris() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;

    try {
      train = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
      GLRMParameters parms = new GLRMParameters();
      parms._train = train._key;
      parms._k = 4;
      parms._loss = GLRMParameters.Loss.Absolute;
      parms._init = GLRM.Initialization.SVD;
      parms._transform = DataInfo.TransformType.NONE;
      parms._recover_svd = true;
      parms._max_iterations = 1000;

      try {
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info(
            "Iteration "
                + model._output._iterations
                + ": Objective value = "
                + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info(
            "Numeric Sum of Squared Error = "
                + mm._numerr
                + "\tCategorical Misclassification Error = "
                + mm._caterr);
      } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        job.remove();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (train != null) train.delete();
      if (model != null) model.delete();
    }
  }