public static void testManualDiscretize() { Node x = new ContinuousVariable("X"); List<Node> nodes = Collections.singletonList(x); DataSet data = new ColtDataSet(9, nodes); data.setDouble(0, 0, 13.0); data.setDouble(1, 0, 1.2); data.setDouble(2, 0, 2.2); data.setDouble(3, 0, 4.5); data.setDouble(4, 0, 12.005); data.setDouble(5, 0, 5.5); data.setDouble(6, 0, 10.1); data.setDouble(7, 0, 7.5); data.setDouble(8, 0, 3.4); System.out.println(data); Discretizer discretizer = new Discretizer(data); discretizer.setVariablesCopied(true); discretizer.equalCounts(x, 3); DataSet discretized = discretizer.discretize(); System.out.println(discretized); assertEquals(discretized.getInt(0, 0), 2); assertEquals(discretized.getInt(1, 0), 0); assertEquals(discretized.getInt(2, 0), 0); assertEquals(discretized.getInt(3, 0), 1); assertEquals(discretized.getInt(4, 0), 2); assertEquals(discretized.getInt(5, 0), 1); assertEquals(discretized.getInt(6, 0), 2); assertEquals(discretized.getInt(7, 0), 1); assertEquals(discretized.getInt(8, 0), 0); }
// Causes a package cycle. public void testManualDiscretize2() { Graph graph = new Dag(GraphUtils.randomGraph(5, 0, 5, 3, 3, 3, false)); SemPm pm = new SemPm(graph); SemIm im = new SemIm(pm); DataSet data = im.simulateData(100, false); List<Node> nodes = data.getVariables(); Discretizer discretizer = new Discretizer(data); // discretizer.setVariablesCopied(true); discretizer.equalCounts(nodes.get(0), 3); discretizer.equalIntervals(nodes.get(1), 2); discretizer.equalCounts(nodes.get(2), 5); discretizer.equalIntervals(nodes.get(3), 8); discretizer.equalCounts(nodes.get(4), 4); DataSet discretized = discretizer.discretize(); System.out.println(discretized); assertEquals(2, maxInColumn(discretized, 0)); assertEquals(1, maxInColumn(discretized, 1)); assertEquals(4, maxInColumn(discretized, 2)); assertEquals(7, maxInColumn(discretized, 3)); assertEquals(3, maxInColumn(discretized, 4)); }
public static void testBreakpointCalculation() { double[] data = {13, 1.2, 2.2, 4.5, 12.005, 5.5, 10.1, 7.5, 3.4}; double[] breakpoints = Discretizer.getEqualFrequencyBreakPoints(data, 3); assertTrue(breakpoints.length == 2); assertEquals(4.5, breakpoints[0]); assertEquals(10.1, breakpoints[1]); Discretizer.Discretization dis = Discretizer.discretize(data, breakpoints, "after", Arrays.asList("0", "1", "2")); System.out.println(dis); breakpoints = Discretizer.getEqualFrequencyBreakPoints(data, 4); assertTrue(breakpoints.length == 3); assertEquals(3.4, breakpoints[0]); assertEquals(5.5, breakpoints[1]); assertEquals(10.1, breakpoints[2]); }
public void testManualDiscretize3() { Graph graph = new Dag(GraphUtils.randomGraph(5, 0, 5, 3, 3, 3, false)); SemPm pm = new SemPm(graph); SemIm im = new SemIm(pm); DataSet data = im.simulateData(100, false); List<Node> nodes = data.getVariables(); Discretizer discretizer = new Discretizer(data); discretizer.setVariablesCopied(true); discretizer.setVariablesCopied(true); discretizer.equalCounts(nodes.get(0), 3); DataSet discretized = discretizer.discretize(); System.out.println(discretized); assertTrue(discretized.getVariable(0) instanceof DiscreteVariable); assertTrue(discretized.getVariable(1) instanceof ContinuousVariable); assertTrue(discretized.getVariable(2) instanceof ContinuousVariable); assertTrue(discretized.getVariable(3) instanceof ContinuousVariable); assertTrue(discretized.getVariable(4) instanceof ContinuousVariable); }
public void testContinuous() { final double[] data = {1, 2, 2.5, 3, 4, 5}; double[] cutoffs = new double[] {2.5, 3.2}; List<String> categories = Arrays.asList("lo", "med", "hi"); Discretizer.Discretization discretization = Discretizer.discretize(data, cutoffs, "after", categories); System.out.println(discretization); List<String> discretizedCategories = discretization.getVariable().getCategories(); int[] discretizedData = discretization.getData(); assertEquals("lo", discretizedCategories.get(discretizedData[0])); assertEquals("lo", discretizedCategories.get(discretizedData[1])); assertEquals("med", discretizedCategories.get(discretizedData[2])); assertEquals("med", discretizedCategories.get(discretizedData[3])); assertEquals("hi", discretizedCategories.get(discretizedData[4])); assertEquals("hi", discretizedCategories.get(discretizedData[5])); }