Ejemplo n.º 1
0
  public static void testManualDiscretize() {
    Node x = new ContinuousVariable("X");
    List<Node> nodes = Collections.singletonList(x);
    DataSet data = new ColtDataSet(9, nodes);

    data.setDouble(0, 0, 13.0);
    data.setDouble(1, 0, 1.2);
    data.setDouble(2, 0, 2.2);
    data.setDouble(3, 0, 4.5);
    data.setDouble(4, 0, 12.005);
    data.setDouble(5, 0, 5.5);
    data.setDouble(6, 0, 10.1);
    data.setDouble(7, 0, 7.5);
    data.setDouble(8, 0, 3.4);

    System.out.println(data);

    Discretizer discretizer = new Discretizer(data);
    discretizer.setVariablesCopied(true);

    discretizer.equalCounts(x, 3);
    DataSet discretized = discretizer.discretize();

    System.out.println(discretized);

    assertEquals(discretized.getInt(0, 0), 2);
    assertEquals(discretized.getInt(1, 0), 0);
    assertEquals(discretized.getInt(2, 0), 0);
    assertEquals(discretized.getInt(3, 0), 1);
    assertEquals(discretized.getInt(4, 0), 2);
    assertEquals(discretized.getInt(5, 0), 1);
    assertEquals(discretized.getInt(6, 0), 2);
    assertEquals(discretized.getInt(7, 0), 1);
    assertEquals(discretized.getInt(8, 0), 0);
  }
Ejemplo n.º 2
0
  // Causes a package cycle.
  public void testManualDiscretize2() {
    Graph graph = new Dag(GraphUtils.randomGraph(5, 0, 5, 3, 3, 3, false));
    SemPm pm = new SemPm(graph);
    SemIm im = new SemIm(pm);
    DataSet data = im.simulateData(100, false);

    List<Node> nodes = data.getVariables();

    Discretizer discretizer = new Discretizer(data);
    //        discretizer.setVariablesCopied(true);

    discretizer.equalCounts(nodes.get(0), 3);
    discretizer.equalIntervals(nodes.get(1), 2);
    discretizer.equalCounts(nodes.get(2), 5);
    discretizer.equalIntervals(nodes.get(3), 8);
    discretizer.equalCounts(nodes.get(4), 4);

    DataSet discretized = discretizer.discretize();

    System.out.println(discretized);

    assertEquals(2, maxInColumn(discretized, 0));
    assertEquals(1, maxInColumn(discretized, 1));
    assertEquals(4, maxInColumn(discretized, 2));
    assertEquals(7, maxInColumn(discretized, 3));
    assertEquals(3, maxInColumn(discretized, 4));
  }
Ejemplo n.º 3
0
  public static void testBreakpointCalculation() {
    double[] data = {13, 1.2, 2.2, 4.5, 12.005, 5.5, 10.1, 7.5, 3.4};
    double[] breakpoints = Discretizer.getEqualFrequencyBreakPoints(data, 3);

    assertTrue(breakpoints.length == 2);
    assertEquals(4.5, breakpoints[0]);
    assertEquals(10.1, breakpoints[1]);

    Discretizer.Discretization dis =
        Discretizer.discretize(data, breakpoints, "after", Arrays.asList("0", "1", "2"));
    System.out.println(dis);

    breakpoints = Discretizer.getEqualFrequencyBreakPoints(data, 4);
    assertTrue(breakpoints.length == 3);

    assertEquals(3.4, breakpoints[0]);
    assertEquals(5.5, breakpoints[1]);
    assertEquals(10.1, breakpoints[2]);
  }
Ejemplo n.º 4
0
  public void testManualDiscretize3() {
    Graph graph = new Dag(GraphUtils.randomGraph(5, 0, 5, 3, 3, 3, false));
    SemPm pm = new SemPm(graph);
    SemIm im = new SemIm(pm);
    DataSet data = im.simulateData(100, false);

    List<Node> nodes = data.getVariables();

    Discretizer discretizer = new Discretizer(data);
    discretizer.setVariablesCopied(true);

    discretizer.setVariablesCopied(true);
    discretizer.equalCounts(nodes.get(0), 3);

    DataSet discretized = discretizer.discretize();

    System.out.println(discretized);

    assertTrue(discretized.getVariable(0) instanceof DiscreteVariable);
    assertTrue(discretized.getVariable(1) instanceof ContinuousVariable);
    assertTrue(discretized.getVariable(2) instanceof ContinuousVariable);
    assertTrue(discretized.getVariable(3) instanceof ContinuousVariable);
    assertTrue(discretized.getVariable(4) instanceof ContinuousVariable);
  }
Ejemplo n.º 5
0
  public void testContinuous() {
    final double[] data = {1, 2, 2.5, 3, 4, 5};

    double[] cutoffs = new double[] {2.5, 3.2};
    List<String> categories = Arrays.asList("lo", "med", "hi");

    Discretizer.Discretization discretization =
        Discretizer.discretize(data, cutoffs, "after", categories);

    System.out.println(discretization);

    List<String> discretizedCategories = discretization.getVariable().getCategories();
    int[] discretizedData = discretization.getData();

    assertEquals("lo", discretizedCategories.get(discretizedData[0]));
    assertEquals("lo", discretizedCategories.get(discretizedData[1]));
    assertEquals("med", discretizedCategories.get(discretizedData[2]));
    assertEquals("med", discretizedCategories.get(discretizedData[3]));
    assertEquals("hi", discretizedCategories.get(discretizedData[4]));
    assertEquals("hi", discretizedCategories.get(discretizedData[5]));
  }