Ejemplo n.º 1
0
  public static void benchmarkDatum() {
    long msStart = System.currentTimeMillis();
    Dataset<String, String> data = new Dataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":true");
          } else {
            features.add("f:" + j + ":false");
          }
        } else {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":false");
          } else {
            features.add("f:" + j + ":false");
          }
        }
      }

      data.add(new BasicDatum<String, String>(features, "target:" + cl));
    }
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Dataset construction took " + delay + " ms");

    msStart = System.currentTimeMillis();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {

          } else {

          }
        } else {
          if (r.nextDouble() > 0.3) {

          } else {

          }
        }
      }
    }
    delay = System.currentTimeMillis() - msStart;
    System.out.println("MultiVector took " + delay + " ms");
  }
Ejemplo n.º 2
0
  /**
   * 57% of time spent in LogConditionalObjectiveFunction.calculateCLBatch() 22% spent in
   * constructing datums (expensive)
   *
   * <p>Single threaded, 4100 ms Multi threaded, 600 ms
   *
   * <p>With same data, seed 42, 52 ms With reordered accesses for cacheing, 38 ms Down to 73% of
   * the time
   *
   * <p>with 8 cpus, a 6.8x speedup -- basically the same as with RVFDatum
   */
  public static void benchmarkLogisticRegression() {
    Dataset<String, String> data = new Dataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":true");
          } else {
            features.add("f:" + j + ":false");
          }
        } else {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":false");
          } else {
            features.add("f:" + j + ":false");
          }
        }
      }

      data.add(new BasicDatum<String, String>(features, "target:" + cl));
    }

    LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();

    long msStart = System.currentTimeMillis();
    factory.trainClassifier(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
Ejemplo n.º 3
0
  /**
   * 67% of time spent in LogConditionalObjectiveFunction.rvfcalculate() 29% of time spent in
   * dataset construction (11% in RVFDataset.addFeatures(), 7% rvf incrementCount(), 11% rest)
   *
   * <p>Single threaded, 4700 ms Multi threaded, 700 ms
   *
   * <p>With same data, seed 42, 245 ms With reordered accesses for cacheing, 195 ms Down to 80% of
   * the time, not huge but a win nonetheless
   *
   * <p>with 8 cpus, a 6.7x speedup -- almost, but not quite linear, pretty good
   */
  public static void benchmarkRVFLogisticRegression() {
    RVFDataset<String, String> data = new RVFDataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Counter<String> features = new ClassicCounter<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        double value;
        if (cl && i % 2 == 0) {
          value = (r.nextDouble() * 2.0) - 0.6;
        } else {
          value = (r.nextDouble() * 2.0) - 1.4;
        }
        features.incrementCount("f" + j, value);
      }

      data.add(new RVFDatum<>(features, "target:" + cl));
    }

    LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();

    long msStart = System.currentTimeMillis();
    factory.trainClassifier(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
Ejemplo n.º 4
0
  public static void benchmarkSGD() {
    Dataset<String, String> data = new Dataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":true");
          } else {
            features.add("f:" + j + ":false");
          }
        } else {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":false");
          } else {
            features.add("f:" + j + ":false");
          }
        }
      }

      data.add(new BasicDatum<String, String>(features, "target:" + cl));
    }

    LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();
    factory.setMinimizerCreator(
        new Factory<Minimizer<DiffFunction>>() {
          @Override
          public Minimizer<DiffFunction> create() {
            return new SGDMinimizer<DiffFunction>(0.1, 100, 0, 1000);
          }
        });

    long msStart = System.currentTimeMillis();
    factory.trainClassifier(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }