/** Performs a test. */
  @Test
  public void testMetric() {

    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.setMetric(null);
    } catch (final NullPointerException e) {
      return;
    }
    Assert.fail();
  }
 /** Returns a string representing the privacy model */
 private String getPrivacyModel(ARXConfiguration config) {
   StringBuilder result = new StringBuilder();
   result.append("{");
   int num = config.getCriteria().size();
   int count = 0;
   for (PrivacyCriterion c : config.getCriteria()) {
     result.append(c.toString());
     if (++count < num) {
       result.append(", ");
     }
   }
   result.append("}");
   return result.toString();
 }
 /**
  * Performs a test.
  *
  * @throws IOException
  */
 @Test
 public void testNullHierarchy() throws IOException {
   try {
     final ARXAnonymizer anonymizer = new ARXAnonymizer();
     final Data data = provider.getData();
     data.getDefinition().setAttributeType("age", (AttributeType) null);
     final ARXConfiguration config = ARXConfiguration.create();
     config.addCriterion(new KAnonymity(2));
     config.setMaxOutliers(1.2d);
     anonymizer.anonymize(data, config);
   } catch (final NullPointerException e) {
     return;
   }
   Assert.fail();
 }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyDefinition() throws IOException {

    final ARXAnonymizer anonymizer = new ARXAnonymizer();
    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(provider.getData(), config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyDatasetWithAttributeDefinition() throws IOException {
    try {
      final ARXAnonymizer anonymizer = new ARXAnonymizer();
      final Data data = Data.create();

      data.getDefinition().setAttributeType("age", AttributeType.IDENTIFYING_ATTRIBUTE);
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(provider.getData(), config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testMissingHierarchyValue() throws IOException {
    provider.createDataDefinitionMissing();
    final Data data = provider.getData();

    final ARXAnonymizer anonymizer = new ARXAnonymizer();

    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(0d);
      anonymizer.anonymize(data, config);
    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyDatasetWithoutAttributeDefinition() throws IOException {

    try {
      final ARXAnonymizer anonymizer = new ARXAnonymizer();
      final Data data = Data.create();

      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(data, config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
Exemple #8
0
  /**
   * Creates a new NodeChecker instance.
   *
   * @param manager The manager
   * @param metric The metric
   * @param config The anonymization configuration
   * @param historyMaxSize The history max size
   * @param snapshotSizeDataset The history threshold
   * @param snapshotSizeSnapshot The history threshold replacement
   */
  public NodeChecker(
      final DataManager manager,
      final Metric<?> metric,
      final ARXConfiguration config,
      final int historyMaxSize,
      final double snapshotSizeDataset,
      final double snapshotSizeSnapshot) {

    // Initialize all operators
    this.metric = metric;
    this.config = config;
    data = manager.getDataQI();
    final int initialSize = (int) (manager.getDataQI().getDataLength() * 0.01d);

    final IntArrayDictionary dictionarySensValue;
    final IntArrayDictionary dictionarySensFreq;

    if ((config.getRequirements() & ARXConfiguration.REQUIREMENT_DISTRIBUTION) != 0) {
      dictionarySensValue = new IntArrayDictionary(initialSize);
      dictionarySensFreq = new IntArrayDictionary(initialSize);
    } else {
      // Just to allow bytecode instrumentation
      dictionarySensValue = new IntArrayDictionary(0);
      dictionarySensFreq = new IntArrayDictionary(0);
    }

    history =
        new History(
            manager.getDataQI().getArray().length,
            historyMaxSize,
            snapshotSizeDataset,
            snapshotSizeSnapshot,
            config,
            dictionarySensValue,
            dictionarySensFreq);

    stateMachine = new StateMachine(history);
    currentGroupify = new HashGroupify(initialSize, config);
    lastGroupify = new HashGroupify(initialSize, config);

    transformer =
        new Transformer(
            manager.getDataQI().getArray(),
            manager.getHierarchies(),
            manager.getDataSE().getArray(),
            config,
            dictionarySensValue,
            dictionarySensFreq);
  }
Exemple #9
0
  @Override
  public Data transformAndMarkOutliers(final Node node) {

    // Apply transition and groupify
    currentGroupify.clear();
    currentGroupify = transformer.apply(0L, node.getTransformation(), currentGroupify);

    // Determine outliers and set infoloss
    node.setAnonymous(currentGroupify.isAnonymous());
    if (!node.isChecked()) {
      node.setChecked();
      metric.evaluate(node, currentGroupify);
      node.setTagged();
    }

    // Find outliers
    if (config.getAbsoluteMaxOutliers() != 0) {
      currentGroupify.markOutliers(transformer.getBuffer());
    }

    // Return the buffer
    return getBuffer();
  }
Exemple #10
0
  /**
   * Entry point.
   *
   * @param args the arguments
   */
  public static void main(String[] args) throws IOException {

    // Define data
    Data data = getData();

    // Define attribute types
    data.getDefinition().setAttributeType("age", getHierarchyAge());
    data.getDefinition().setAttributeType("zipcode", getHierarchyZipcode());
    data.getDefinition().setAttributeType("disease1", AttributeType.SENSITIVE_ATTRIBUTE);
    data.getDefinition().setAttributeType("disease2", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addPrivacyModel(new KAnonymity(3));
    config.addPrivacyModel(
        new HierarchicalDistanceTCloseness("disease1", 0.6d, getHierarchyDisease()));
    config.addPrivacyModel(new RecursiveCLDiversity("disease2", 3d, 2));
    config.setMaxOutliers(0d);
    config.setQualityModel(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    if (result.getGlobalOptimum() != null) {
      System.out.println(" - Transformed data:");
      Iterator<String[]> transformed = result.getOutput(false).iterator();
      while (transformed.hasNext()) {
        System.out.print("   ");
        System.out.println(Arrays.toString(transformed.next()));
      }
    }
  }
 /**
  * Returns the test cases.
  *
  * @return
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() {
   return Arrays.asList(
       new Object[][] {
           /* 0 */
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               216092.124036387,
               new int[] {1, 0, 1, 0, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 100, EntropyEstimator.SHANNON)),
               "occupation",
               "./data/adult.csv",
               0.0d,
               null,
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               324620.5269918692,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 3, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               180347.4325366015,
               new int[] {0, 0, 1, 1, 2, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.SHANNON)),
               "occupation",
               "./data/adult.csv",
               228878.2039109517,
               new int[] {1, 0, 1, 1, 2, 2, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.1d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 100, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               0.0d,
               null,
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               1833435.0,
               new int[] {4, 0, 1, 0, 1, 3, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.03d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               4.5168281E7,
               new int[] {4, 4, 0, 0, 1, 3, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 5)),
               "RAMNTALL",
               "./data/cup.csv",
               3.01506905E8,
               new int[] {4, 4, 1, 1, 1, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 3)),
               "RAMNTALL",
               "./data/cup.csv",
               9.2264547E7,
               new int[] {4, 4, 1, 0, 1, 4, 4},
               false)
         },
         /* 10 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.SHANNON)),
               "RAMNTALL",
               "./data/cup.csv",
               2823649.0,
               new int[] {4, 0, 0, 1, 1, 3, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.1d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               3.4459973E7,
               new int[] {5, 0, 0, 2, 1, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               7735322.29514608,
               new int[] {0, 0, 0, 1, 3, 0, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 2, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               5428093.534997522,
               new int[] {0, 0, 0, 0, 2, 0, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new EntropyLDiversity("EDUC", 5, EntropyEstimator.SHANNON)),
               "EDUC",
               "./data/ihis.csv",
               1.2258628558792587E7,
               new int[] {0, 0, 0, 3, 3, 2, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 100, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               0.0d,
               null,
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               7735322.29514608,
               new int[] {0, 0, 0, 1, 3, 0, 0, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.02d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new EntropyLDiversity("EDUC", 3, EntropyEstimator.SHANNON)),
               "EDUC",
               "./data/ihis.csv",
               7578152.206004559,
               new int[] {0, 0, 0, 2, 2, 0, 0, 1},
               true)
         },
       });
 }
 /**
  * Return prosecutor risk threshold, 1 if there is none
  *
  * @return
  */
 public double getEstimatedProsecutorRisk() {
   return Math.min(
       1.0d / (double) getHistogram().getHistogram()[0],
       config != null && anonymous ? config.getRiskThresholdProsecutor() : 1d);
 }
 /**
  * Return marketer risk threshold, 1 if there is none
  *
  * @return
  */
 public double getEstimatedMarketerRisk() {
   return Math.min(
       1.0d / getHistogram().getAvgClassSize(),
       config != null && anonymous ? config.getRiskThresholdMarketer() : 1d);
 }
Exemple #14
0
  /**
   * Entry point.
   *
   * @param args the arguments
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("age", "gender", "zipcode");
    data.add("45", "female", "81675");
    data.add("34", "male", "81667");
    data.add("66", "male", "81925");
    data.add("70", "female", "81931");
    data.add("34", "female", "81931");
    data.add("70", "male", "81931");
    data.add("45", "male", "81931");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("34", "<50", "*");
    age.add("45", "<50", "*");
    age.add("66", ">=50", "*");
    age.add("70", ">=50", "*");

    DefaultHierarchy gender = Hierarchy.create();
    gender.add("male", "*");
    gender.add("female", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****");
    zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****");
    zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****");
    zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("gender", gender);
    data.getDefinition().setAttributeType("zipcode", zipcode);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(2));
    config.setMaxOutliers(0d);

    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Print input
    System.out.println(" - Input data:");
    Iterator<String[]> original = data.getHandle().iterator();
    while (original.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(original.next()));
    }

    // Print results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }

    // Print frequencies
    StatisticsFrequencyDistribution distribution;
    System.out.println(" - Distribution of attribute 'age' in input:");
    distribution = data.getHandle().getStatistics().getFrequencyDistribution(0, false);
    System.out.println("   " + Arrays.toString(distribution.values));
    System.out.println("   " + Arrays.toString(distribution.frequency));

    // Print frequencies
    System.out.println(" - Distribution of attribute 'age' in output:");
    distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(0, true);
    System.out.println("   " + Arrays.toString(distribution.values));
    System.out.println("   " + Arrays.toString(distribution.frequency));

    // Print contingency tables
    StatisticsContingencyTable contingency;
    System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in input:");
    contingency = data.getHandle().getStatistics().getContingencyTable(0, true, 2, true);
    System.out.println("   " + Arrays.toString(contingency.values1));
    System.out.println("   " + Arrays.toString(contingency.values2));
    while (contingency.iterator.hasNext()) {
      Entry e = contingency.iterator.next();
      System.out.println("   [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]");
    }

    // Print contingency tables
    System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in output:");
    contingency = result.getOutput(false).getStatistics().getContingencyTable(0, true, 2, true);
    System.out.println("   " + Arrays.toString(contingency.values1));
    System.out.println("   " + Arrays.toString(contingency.values2));
    while (contingency.iterator.hasNext()) {
      Entry e = contingency.iterator.next();
      System.out.println("   [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]");
    }
  }
 /**
  * Returns the test cases.
  *
  * @return
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() {
   return Arrays.asList(
       new Object[][] {
         /* 0 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               255559.85455731067,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               379417.3460570988,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               407289.5388925293,
               new int[] {1, 2, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               453196.8932458743,
               new int[] {0, 4, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               255559.85455731067,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               379417.3460570988,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               1764006.4033760305,
               new int[] {2, 4, 0, 1, 0, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               1994002.8308631124,
               new int[] {3, 4, 1, 1, 0, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               2445878.424834677,
               new int[] {4, 4, 1, 1, 1, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               2517471.5816586106,
               new int[] {5, 4, 1, 0, 1, 4, 4, 4},
               false)
         },
         /* 10 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               1764006.4033760305,
               new int[] {2, 4, 0, 1, 0, 4, 4, 4},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               2001343.4737485605,
               new int[] {3, 4, 1, 1, 0, 1, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               4469271.0,
               new int[] {0, 2, 2, 2, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               5.6052481E7,
               new int[] {0, 2, 3, 3, 1, 2, 2, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               1.42377891E8,
               new int[] {1, 2, 3, 3, 1, 2, 1, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               4.36925397E8,
               new int[] {5, 2, 3, 3, 1, 2, 0, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               4469271.0,
               new int[] {0, 2, 2, 2, 1, 2, 1, 0},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               5.6052481E7,
               new int[] {0, 2, 3, 3, 1, 2, 2, 2},
               true)
         },
       });
 }
Exemple #16
0
  @Override
  protected void initializeInternal(
      final DataDefinition definition,
      final Data input,
      final GeneralizationHierarchy[] ahierarchies,
      final ARXConfiguration config) {

    // Obtain dictionary
    final Dictionary dictionary = input.getDictionary();

    // Obtain research subset
    RowSet rSubset = null;
    if (config.containsCriterion(DPresence.class)) {
      Set<DPresence> crits = config.getCriteria(DPresence.class);
      if (crits.size() > 1) {
        throw new IllegalArgumentException("Only one d-presence criterion supported!");
      }
      for (DPresence dPresence : crits) {
        rSubset = dPresence.getSubset().getSet();
      }
    }

    // Create reference to the hierarchies
    final int[][] data = input.getArray();
    hierarchies = new int[data[0].length][][];
    for (int i = 0; i < ahierarchies.length; i++) {
      hierarchies[i] = ahierarchies[i].getArray();
      // Column -> Id -> Level -> Output
    }

    // Initialize counts
    cardinalities = new int[data[0].length][][];
    for (int i = 0; i < cardinalities.length; i++) {
      cardinalities[i] =
          new int[dictionary.getMapping()[i].length][ahierarchies[i].getArray()[0].length];
      // Column -> Id -> Level -> Count
    }

    for (int i = 0; i < data.length; i++) {
      // only use the rows contained in the research subset
      if (rSubset == null || rSubset.contains(i)) {
        final int[] row = data[i];
        for (int column = 0; column < row.length; column++) {
          cardinalities[column][row[column]][0]++;
        }
      }
    }

    // Create counts for other levels
    for (int column = 0; column < hierarchies.length; column++) {
      final int[][] hierarchy = hierarchies[column];
      for (int in = 0; in < hierarchy.length; in++) {
        final int cardinality = cardinalities[column][in][0];
        for (int level = 1; level < hierarchy[in].length; level++) {
          final int out = hierarchy[in][level];
          cardinalities[column][out][level] += cardinality;
        }
      }
    }

    // Create a cache for the results
    cache = new double[hierarchies.length][];
    for (int i = 0; i < cache.length; i++) {
      cache[i] = new double[ahierarchies[i].getArray()[0].length];
      Arrays.fill(cache[i], NA);
    }
  }
 /**
  * Returns the test cases.
  *
  * @return
  * @throws IOException
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() throws IOException {
   return Arrays.asList(
       new Object[][] {
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           0.01d,
                           ARXPopulationModel.create(Region.USA),
                           CellSizeEstimator.ZERO_TRUNCATED_POISSON)),
               "occupation",
               "./data/adult.csv",
               130804.5332092598,
               new int[] {0, 0, 1, 1, 0, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           1000,
                           0.01d,
                           ARXPopulationModel.create(Region.USA),
                           CellSizeEstimator.ZERO_TRUNCATED_POISSON)),
               "occupation",
               "./data/adult.csv",
               151894.1394841501,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               45.014925373134325,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               23387.494246375998,
               new int[] {0, 0, 1, 2, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               28551.7222913157,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           20,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               11.424242424242424,
               new int[] {1, 0, 1, 1, 3, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           7,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               17075.7181747451,
               new int[] {0, 0, 1, 1, 2, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               15121.633326877098,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               45.014925373134325,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           2,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               23108.1673304724,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           10,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               30238.2081484441,
               new int[] {0, 1, 1, 2, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           10,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               7.215311004784689,
               new int[] {0, 0, 1, 1, 3, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               17053.8743069776,
               new int[] {0, 0, 1, 0, 2, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               15121.633326877098,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
       });
 }
Exemple #18
0
  /**
   * Entry point.
   *
   * @param args the arguments
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("zipcode", "age", "disease");
    data.add("47677", "29", "gastric ulcer");
    data.add("47602", "22", "gastritis");
    data.add("47678", "27", "stomach cancer");
    data.add("47905", "43", "gastritis");
    data.add("47909", "52", "flu");
    data.add("47906", "47", "bronchitis");
    data.add("47605", "30", "bronchitis");
    data.add("47673", "36", "pneumonia");
    data.add("47607", "32", "stomach cancer");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("29", "<=40", "*");
    age.add("22", "<=40", "*");
    age.add("27", "<=40", "*");
    age.add("43", ">40", "*");
    age.add("52", ">40", "*");
    age.add("47", ">40", "*");
    age.add("30", "<=40", "*");
    age.add("36", "<=40", "*");
    age.add("32", "<=40", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****");

    // Define sensitive value hierarchy
    DefaultHierarchy disease = Hierarchy.create();
    disease.add(
        "flu",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pneumonia",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "bronchitis",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary edema",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary embolism",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastric ulcer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "stomach cancer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastritis",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colitis",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colon cancer",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("zipcode", zipcode);
    data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(3));
    config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease));
    config.setMaxOutliers(0d);
    config.setMetric(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }
  }