Exemple #1
0
  /**
   * 57% of time spent in LogConditionalObjectiveFunction.calculateCLBatch() 22% spent in
   * constructing datums (expensive)
   *
   * <p>Single threaded, 4100 ms Multi threaded, 600 ms
   *
   * <p>With same data, seed 42, 52 ms With reordered accesses for cacheing, 38 ms Down to 73% of
   * the time
   *
   * <p>with 8 cpus, a 6.8x speedup -- basically the same as with RVFDatum
   */
  public static void benchmarkLogisticRegression() {
    Dataset<String, String> data = new Dataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":true");
          } else {
            features.add("f:" + j + ":false");
          }
        } else {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":false");
          } else {
            features.add("f:" + j + ":false");
          }
        }
      }

      data.add(new BasicDatum<String, String>(features, "target:" + cl));
    }

    LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();

    long msStart = System.currentTimeMillis();
    factory.trainClassifier(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
Exemple #2
0
  /**
   * 67% of time spent in LogConditionalObjectiveFunction.rvfcalculate() 29% of time spent in
   * dataset construction (11% in RVFDataset.addFeatures(), 7% rvf incrementCount(), 11% rest)
   *
   * <p>Single threaded, 4700 ms Multi threaded, 700 ms
   *
   * <p>With same data, seed 42, 245 ms With reordered accesses for cacheing, 195 ms Down to 80% of
   * the time, not huge but a win nonetheless
   *
   * <p>with 8 cpus, a 6.7x speedup -- almost, but not quite linear, pretty good
   */
  public static void benchmarkRVFLogisticRegression() {
    RVFDataset<String, String> data = new RVFDataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Counter<String> features = new ClassicCounter<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        double value;
        if (cl && i % 2 == 0) {
          value = (r.nextDouble() * 2.0) - 0.6;
        } else {
          value = (r.nextDouble() * 2.0) - 1.4;
        }
        features.incrementCount("f" + j, value);
      }

      data.add(new RVFDatum<>(features, "target:" + cl));
    }

    LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();

    long msStart = System.currentTimeMillis();
    factory.trainClassifier(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
Exemple #3
0
  /** on my machine this results in a factor of two gain, roughly */
  public static void testAdjacency() {
    double[][] sqar = new double[10000][1000];
    Random r = new Random();

    int k = 0;
    long msStart = System.currentTimeMillis();
    for (int i = 0; i < 10000; i++) {
      int loc = r.nextInt(10000);
      for (int j = 0; j < 1000; j++) {
        k += sqar[loc][j];
      }
    }
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Scanning with cache friendly lookups took " + delay + " ms");

    int[] randLocs = new int[10000];
    for (int i = 0; i < 10000; i++) {
      randLocs[i] = r.nextInt(10000);
    }

    k = 0;
    msStart = System.currentTimeMillis();
    for (int j = 0; j < 1000; j++) {
      for (int i = 0; i < 10000; i++) {
        k += sqar[randLocs[i]][j];
      }
    }
    delay = System.currentTimeMillis() - msStart;
    System.out.println("Scanning with cache UNfriendly lookups took " + delay + " ms");
  }
Exemple #4
0
  /**
   * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for
   * threads
   *
   * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms
   *
   * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization
   */
  public static void benchmarkCRF() {
    Properties props = new Properties();
    props.setProperty("macro", "true"); // use a generic CRF configuration
    props.setProperty("useIfInteger", "true");
    props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory");
    props.setProperty("saveFeatureIndexToDisk", "false");

    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props);

    Random r = new Random(42);

    List<List<CoreLabel>> data = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
      List<CoreLabel> sentence = new ArrayList<>();
      for (int j = 0; j < 20; j++) {
        CoreLabel l = new CoreLabel();

        l.setWord("j:" + j);

        boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7);
        l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag);
        sentence.add(l);
      }
      data.add(sentence);
    }

    long msStart = System.currentTimeMillis();
    crf.train(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
Exemple #5
0
  public static void benchmarkDatum() {
    long msStart = System.currentTimeMillis();
    Dataset<String, String> data = new Dataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":true");
          } else {
            features.add("f:" + j + ":false");
          }
        } else {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":false");
          } else {
            features.add("f:" + j + ":false");
          }
        }
      }

      data.add(new BasicDatum<String, String>(features, "target:" + cl));
    }
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Dataset construction took " + delay + " ms");

    msStart = System.currentTimeMillis();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {

          } else {

          }
        } else {
          if (r.nextDouble() > 0.3) {

          } else {

          }
        }
      }
    }
    delay = System.currentTimeMillis() - msStart;
    System.out.println("MultiVector took " + delay + " ms");
  }
Exemple #6
0
  public static void benchmarkSGD() {
    Dataset<String, String> data = new Dataset<>();
    for (int i = 0; i < 10000; i++) {
      Random r = new Random(42);
      Set<String> features = new HashSet<>();

      boolean cl = r.nextBoolean();

      for (int j = 0; j < 1000; j++) {
        if (cl && i % 2 == 0) {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":true");
          } else {
            features.add("f:" + j + ":false");
          }
        } else {
          if (r.nextDouble() > 0.3) {
            features.add("f:" + j + ":false");
          } else {
            features.add("f:" + j + ":false");
          }
        }
      }

      data.add(new BasicDatum<String, String>(features, "target:" + cl));
    }

    LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();
    factory.setMinimizerCreator(
        new Factory<Minimizer<DiffFunction>>() {
          @Override
          public Minimizer<DiffFunction> create() {
            return new SGDMinimizer<DiffFunction>(0.1, 100, 0, 1000);
          }
        });

    long msStart = System.currentTimeMillis();
    factory.trainClassifier(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }