/** * 57% of time spent in LogConditionalObjectiveFunction.calculateCLBatch() 22% spent in * constructing datums (expensive) * * <p>Single threaded, 4100 ms Multi threaded, 600 ms * * <p>With same data, seed 42, 52 ms With reordered accesses for cacheing, 38 ms Down to 73% of * the time * * <p>with 8 cpus, a 6.8x speedup -- basically the same as with RVFDatum */ public static void benchmarkLogisticRegression() { Dataset<String, String> data = new Dataset<>(); for (int i = 0; i < 10000; i++) { Random r = new Random(42); Set<String> features = new HashSet<>(); boolean cl = r.nextBoolean(); for (int j = 0; j < 1000; j++) { if (cl && i % 2 == 0) { if (r.nextDouble() > 0.3) { features.add("f:" + j + ":true"); } else { features.add("f:" + j + ":false"); } } else { if (r.nextDouble() > 0.3) { features.add("f:" + j + ":false"); } else { features.add("f:" + j + ":false"); } } } data.add(new BasicDatum<String, String>(features, "target:" + cl)); } LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>(); long msStart = System.currentTimeMillis(); factory.trainClassifier(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }
/** * 67% of time spent in LogConditionalObjectiveFunction.rvfcalculate() 29% of time spent in * dataset construction (11% in RVFDataset.addFeatures(), 7% rvf incrementCount(), 11% rest) * * <p>Single threaded, 4700 ms Multi threaded, 700 ms * * <p>With same data, seed 42, 245 ms With reordered accesses for cacheing, 195 ms Down to 80% of * the time, not huge but a win nonetheless * * <p>with 8 cpus, a 6.7x speedup -- almost, but not quite linear, pretty good */ public static void benchmarkRVFLogisticRegression() { RVFDataset<String, String> data = new RVFDataset<>(); for (int i = 0; i < 10000; i++) { Random r = new Random(42); Counter<String> features = new ClassicCounter<>(); boolean cl = r.nextBoolean(); for (int j = 0; j < 1000; j++) { double value; if (cl && i % 2 == 0) { value = (r.nextDouble() * 2.0) - 0.6; } else { value = (r.nextDouble() * 2.0) - 1.4; } features.incrementCount("f" + j, value); } data.add(new RVFDatum<>(features, "target:" + cl)); } LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>(); long msStart = System.currentTimeMillis(); factory.trainClassifier(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }
public static void benchmarkDatum() { long msStart = System.currentTimeMillis(); Dataset<String, String> data = new Dataset<>(); for (int i = 0; i < 10000; i++) { Random r = new Random(42); Set<String> features = new HashSet<>(); boolean cl = r.nextBoolean(); for (int j = 0; j < 1000; j++) { if (cl && i % 2 == 0) { if (r.nextDouble() > 0.3) { features.add("f:" + j + ":true"); } else { features.add("f:" + j + ":false"); } } else { if (r.nextDouble() > 0.3) { features.add("f:" + j + ":false"); } else { features.add("f:" + j + ":false"); } } } data.add(new BasicDatum<String, String>(features, "target:" + cl)); } long delay = System.currentTimeMillis() - msStart; System.out.println("Dataset construction took " + delay + " ms"); msStart = System.currentTimeMillis(); for (int i = 0; i < 10000; i++) { Random r = new Random(42); Set<String> features = new HashSet<>(); boolean cl = r.nextBoolean(); for (int j = 0; j < 1000; j++) { if (cl && i % 2 == 0) { if (r.nextDouble() > 0.3) { } else { } } else { if (r.nextDouble() > 0.3) { } else { } } } } delay = System.currentTimeMillis() - msStart; System.out.println("MultiVector took " + delay + " ms"); }
/** * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for * threads * * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms * * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization */ public static void benchmarkCRF() { Properties props = new Properties(); props.setProperty("macro", "true"); // use a generic CRF configuration props.setProperty("useIfInteger", "true"); props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory"); props.setProperty("saveFeatureIndexToDisk", "false"); CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props); Random r = new Random(42); List<List<CoreLabel>> data = new ArrayList<>(); for (int i = 0; i < 100; i++) { List<CoreLabel> sentence = new ArrayList<>(); for (int j = 0; j < 20; j++) { CoreLabel l = new CoreLabel(); l.setWord("j:" + j); boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7); l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag); sentence.add(l); } data.add(sentence); } long msStart = System.currentTimeMillis(); crf.train(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }
public static void benchmarkSGD() { Dataset<String, String> data = new Dataset<>(); for (int i = 0; i < 10000; i++) { Random r = new Random(42); Set<String> features = new HashSet<>(); boolean cl = r.nextBoolean(); for (int j = 0; j < 1000; j++) { if (cl && i % 2 == 0) { if (r.nextDouble() > 0.3) { features.add("f:" + j + ":true"); } else { features.add("f:" + j + ":false"); } } else { if (r.nextDouble() > 0.3) { features.add("f:" + j + ":false"); } else { features.add("f:" + j + ":false"); } } } data.add(new BasicDatum<String, String>(features, "target:" + cl)); } LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>(); factory.setMinimizerCreator( new Factory<Minimizer<DiffFunction>>() { @Override public Minimizer<DiffFunction> create() { return new SGDMinimizer<DiffFunction>(0.1, 100, 0, 1000); } }); long msStart = System.currentTimeMillis(); factory.trainClassifier(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }