/** Performs a test. */
  @Test
  public void testMetric() {

    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.setMetric(null);
    } catch (final NullPointerException e) {
      return;
    }
    Assert.fail();
  }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyDefinition() throws IOException {

    final ARXAnonymizer anonymizer = new ARXAnonymizer();
    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(provider.getData(), config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
 /**
  * Performs a test.
  *
  * @throws IOException
  */
 @Test
 public void testNullHierarchy() throws IOException {
   try {
     final ARXAnonymizer anonymizer = new ARXAnonymizer();
     final Data data = provider.getData();
     data.getDefinition().setAttributeType("age", (AttributeType) null);
     final ARXConfiguration config = ARXConfiguration.create();
     config.addCriterion(new KAnonymity(2));
     config.setMaxOutliers(1.2d);
     anonymizer.anonymize(data, config);
   } catch (final NullPointerException e) {
     return;
   }
   Assert.fail();
 }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyDatasetWithoutAttributeDefinition() throws IOException {

    try {
      final ARXAnonymizer anonymizer = new ARXAnonymizer();
      final Data data = Data.create();

      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(data, config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyDatasetWithAttributeDefinition() throws IOException {
    try {
      final ARXAnonymizer anonymizer = new ARXAnonymizer();
      final Data data = Data.create();

      data.getDefinition().setAttributeType("age", AttributeType.IDENTIFYING_ATTRIBUTE);
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(provider.getData(), config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testMissingHierarchyValue() throws IOException {
    provider.createDataDefinitionMissing();
    final Data data = provider.getData();

    final ARXAnonymizer anonymizer = new ARXAnonymizer();

    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(0d);
      anonymizer.anonymize(data, config);
    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
Exemple #7
0
  /**
   * Entry point.
   *
   * @param args the arguments
   */
  public static void main(String[] args) throws IOException {

    // Define data
    Data data = getData();

    // Define attribute types
    data.getDefinition().setAttributeType("age", getHierarchyAge());
    data.getDefinition().setAttributeType("zipcode", getHierarchyZipcode());
    data.getDefinition().setAttributeType("disease1", AttributeType.SENSITIVE_ATTRIBUTE);
    data.getDefinition().setAttributeType("disease2", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addPrivacyModel(new KAnonymity(3));
    config.addPrivacyModel(
        new HierarchicalDistanceTCloseness("disease1", 0.6d, getHierarchyDisease()));
    config.addPrivacyModel(new RecursiveCLDiversity("disease2", 3d, 2));
    config.setMaxOutliers(0d);
    config.setQualityModel(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    if (result.getGlobalOptimum() != null) {
      System.out.println(" - Transformed data:");
      Iterator<String[]> transformed = result.getOutput(false).iterator();
      while (transformed.hasNext()) {
        System.out.print("   ");
        System.out.println(Arrays.toString(transformed.next()));
      }
    }
  }
 /**
  * Returns the test cases.
  *
  * @return
  * @throws IOException
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() throws IOException {
   return Arrays.asList(
       new Object[][] {
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           0.01d,
                           ARXPopulationModel.create(Region.USA),
                           CellSizeEstimator.ZERO_TRUNCATED_POISSON)),
               "occupation",
               "./data/adult.csv",
               130804.5332092598,
               new int[] {0, 0, 1, 1, 0, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           1000,
                           0.01d,
                           ARXPopulationModel.create(Region.USA),
                           CellSizeEstimator.ZERO_TRUNCATED_POISSON)),
               "occupation",
               "./data/adult.csv",
               151894.1394841501,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               45.014925373134325,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               23387.494246375998,
               new int[] {0, 0, 1, 2, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               28551.7222913157,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           20,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               11.424242424242424,
               new int[] {1, 0, 1, 1, 3, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           7,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               17075.7181747451,
               new int[] {0, 0, 1, 1, 2, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               15121.633326877098,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               45.014925373134325,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           2,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               23108.1673304724,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           10,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               30238.2081484441,
               new int[] {0, 1, 1, 2, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           10,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               7.215311004784689,
               new int[] {0, 0, 1, 1, 3, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               17053.8743069776,
               new int[] {0, 0, 1, 0, 2, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               15121.633326877098,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
       });
 }
Exemple #9
0
  /**
   * Entry point.
   *
   * @param args the arguments
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("age", "gender", "zipcode");
    data.add("45", "female", "81675");
    data.add("34", "male", "81667");
    data.add("66", "male", "81925");
    data.add("70", "female", "81931");
    data.add("34", "female", "81931");
    data.add("70", "male", "81931");
    data.add("45", "male", "81931");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("34", "<50", "*");
    age.add("45", "<50", "*");
    age.add("66", ">=50", "*");
    age.add("70", ">=50", "*");

    DefaultHierarchy gender = Hierarchy.create();
    gender.add("male", "*");
    gender.add("female", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****");
    zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****");
    zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****");
    zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("gender", gender);
    data.getDefinition().setAttributeType("zipcode", zipcode);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(2));
    config.setMaxOutliers(0d);

    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Print input
    System.out.println(" - Input data:");
    Iterator<String[]> original = data.getHandle().iterator();
    while (original.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(original.next()));
    }

    // Print results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }

    // Print frequencies
    StatisticsFrequencyDistribution distribution;
    System.out.println(" - Distribution of attribute 'age' in input:");
    distribution = data.getHandle().getStatistics().getFrequencyDistribution(0, false);
    System.out.println("   " + Arrays.toString(distribution.values));
    System.out.println("   " + Arrays.toString(distribution.frequency));

    // Print frequencies
    System.out.println(" - Distribution of attribute 'age' in output:");
    distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(0, true);
    System.out.println("   " + Arrays.toString(distribution.values));
    System.out.println("   " + Arrays.toString(distribution.frequency));

    // Print contingency tables
    StatisticsContingencyTable contingency;
    System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in input:");
    contingency = data.getHandle().getStatistics().getContingencyTable(0, true, 2, true);
    System.out.println("   " + Arrays.toString(contingency.values1));
    System.out.println("   " + Arrays.toString(contingency.values2));
    while (contingency.iterator.hasNext()) {
      Entry e = contingency.iterator.next();
      System.out.println("   [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]");
    }

    // Print contingency tables
    System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in output:");
    contingency = result.getOutput(false).getStatistics().getContingencyTable(0, true, 2, true);
    System.out.println("   " + Arrays.toString(contingency.values1));
    System.out.println("   " + Arrays.toString(contingency.values2));
    while (contingency.iterator.hasNext()) {
      Entry e = contingency.iterator.next();
      System.out.println("   [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]");
    }
  }
 /**
  * Returns the test cases.
  *
  * @return
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() {
   return Arrays.asList(
       new Object[][] {
         /* 0 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               255559.85455731067,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               379417.3460570988,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               407289.5388925293,
               new int[] {1, 2, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               453196.8932458743,
               new int[] {0, 4, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               255559.85455731067,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               379417.3460570988,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               1764006.4033760305,
               new int[] {2, 4, 0, 1, 0, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               1994002.8308631124,
               new int[] {3, 4, 1, 1, 0, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               2445878.424834677,
               new int[] {4, 4, 1, 1, 1, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               2517471.5816586106,
               new int[] {5, 4, 1, 0, 1, 4, 4, 4},
               false)
         },
         /* 10 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               1764006.4033760305,
               new int[] {2, 4, 0, 1, 0, 4, 4, 4},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               2001343.4737485605,
               new int[] {3, 4, 1, 1, 0, 1, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               4469271.0,
               new int[] {0, 2, 2, 2, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               5.6052481E7,
               new int[] {0, 2, 3, 3, 1, 2, 2, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               1.42377891E8,
               new int[] {1, 2, 3, 3, 1, 2, 1, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               4.36925397E8,
               new int[] {5, 2, 3, 3, 1, 2, 0, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               4469271.0,
               new int[] {0, 2, 2, 2, 1, 2, 1, 0},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               5.6052481E7,
               new int[] {0, 2, 3, 3, 1, 2, 2, 2},
               true)
         },
       });
 }
 /**
  * Returns the test cases.
  *
  * @return
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() {
   return Arrays.asList(
       new Object[][] {
           /* 0 */
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               216092.124036387,
               new int[] {1, 0, 1, 0, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 100, EntropyEstimator.SHANNON)),
               "occupation",
               "./data/adult.csv",
               0.0d,
               null,
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               324620.5269918692,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 3, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               180347.4325366015,
               new int[] {0, 0, 1, 1, 2, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.SHANNON)),
               "occupation",
               "./data/adult.csv",
               228878.2039109517,
               new int[] {1, 0, 1, 1, 2, 2, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.1d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 100, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               0.0d,
               null,
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               1833435.0,
               new int[] {4, 0, 1, 0, 1, 3, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.03d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               4.5168281E7,
               new int[] {4, 4, 0, 0, 1, 3, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 5)),
               "RAMNTALL",
               "./data/cup.csv",
               3.01506905E8,
               new int[] {4, 4, 1, 1, 1, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 3)),
               "RAMNTALL",
               "./data/cup.csv",
               9.2264547E7,
               new int[] {4, 4, 1, 0, 1, 4, 4},
               false)
         },
         /* 10 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.SHANNON)),
               "RAMNTALL",
               "./data/cup.csv",
               2823649.0,
               new int[] {4, 0, 0, 1, 1, 3, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.1d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               3.4459973E7,
               new int[] {5, 0, 0, 2, 1, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               7735322.29514608,
               new int[] {0, 0, 0, 1, 3, 0, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 2, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               5428093.534997522,
               new int[] {0, 0, 0, 0, 2, 0, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new EntropyLDiversity("EDUC", 5, EntropyEstimator.SHANNON)),
               "EDUC",
               "./data/ihis.csv",
               1.2258628558792587E7,
               new int[] {0, 0, 0, 3, 3, 2, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 100, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               0.0d,
               null,
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               7735322.29514608,
               new int[] {0, 0, 0, 1, 3, 0, 0, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.02d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new EntropyLDiversity("EDUC", 3, EntropyEstimator.SHANNON)),
               "EDUC",
               "./data/ihis.csv",
               7578152.206004559,
               new int[] {0, 0, 0, 2, 2, 0, 0, 1},
               true)
         },
       });
 }
Exemple #12
0
  /**
   * Entry point.
   *
   * @param args the arguments
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("zipcode", "age", "disease");
    data.add("47677", "29", "gastric ulcer");
    data.add("47602", "22", "gastritis");
    data.add("47678", "27", "stomach cancer");
    data.add("47905", "43", "gastritis");
    data.add("47909", "52", "flu");
    data.add("47906", "47", "bronchitis");
    data.add("47605", "30", "bronchitis");
    data.add("47673", "36", "pneumonia");
    data.add("47607", "32", "stomach cancer");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("29", "<=40", "*");
    age.add("22", "<=40", "*");
    age.add("27", "<=40", "*");
    age.add("43", ">40", "*");
    age.add("52", ">40", "*");
    age.add("47", ">40", "*");
    age.add("30", "<=40", "*");
    age.add("36", "<=40", "*");
    age.add("32", "<=40", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****");

    // Define sensitive value hierarchy
    DefaultHierarchy disease = Hierarchy.create();
    disease.add(
        "flu",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pneumonia",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "bronchitis",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary edema",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary embolism",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastric ulcer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "stomach cancer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastritis",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colitis",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colon cancer",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("zipcode", zipcode);
    data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(3));
    config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease));
    config.setMaxOutliers(0d);
    config.setMetric(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }
  }