/** Performs a test. */ @Test public void testMetric() { try { final ARXConfiguration config = ARXConfiguration.create(); config.setMetric(null); } catch (final NullPointerException e) { return; } Assert.fail(); }
/** Returns a string representing the privacy model */ private String getPrivacyModel(ARXConfiguration config) { StringBuilder result = new StringBuilder(); result.append("{"); int num = config.getCriteria().size(); int count = 0; for (PrivacyCriterion c : config.getCriteria()) { result.append(c.toString()); if (++count < num) { result.append(", "); } } result.append("}"); return result.toString(); }
/** * Performs a test. * * @throws IOException */ @Test public void testNullHierarchy() throws IOException { try { final ARXAnonymizer anonymizer = new ARXAnonymizer(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", (AttributeType) null); final ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(1.2d); anonymizer.anonymize(data, config); } catch (final NullPointerException e) { return; } Assert.fail(); }
/** * Performs a test. * * @throws IOException */ @Test public void testEmptyDefinition() throws IOException { final ARXAnonymizer anonymizer = new ARXAnonymizer(); try { final ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(1.2d); anonymizer.anonymize(provider.getData(), config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); }
/** * Performs a test. * * @throws IOException */ @Test public void testEmptyDatasetWithAttributeDefinition() throws IOException { try { final ARXAnonymizer anonymizer = new ARXAnonymizer(); final Data data = Data.create(); data.getDefinition().setAttributeType("age", AttributeType.IDENTIFYING_ATTRIBUTE); final ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(1.2d); anonymizer.anonymize(provider.getData(), config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); }
/** * Performs a test. * * @throws IOException */ @Test public void testMissingHierarchyValue() throws IOException { provider.createDataDefinitionMissing(); final Data data = provider.getData(); final ARXAnonymizer anonymizer = new ARXAnonymizer(); try { final ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(0d); anonymizer.anonymize(data, config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); }
/** * Performs a test. * * @throws IOException */ @Test public void testEmptyDatasetWithoutAttributeDefinition() throws IOException { try { final ARXAnonymizer anonymizer = new ARXAnonymizer(); final Data data = Data.create(); final ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(1.2d); anonymizer.anonymize(data, config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); }
/** * Creates a new NodeChecker instance. * * @param manager The manager * @param metric The metric * @param config The anonymization configuration * @param historyMaxSize The history max size * @param snapshotSizeDataset The history threshold * @param snapshotSizeSnapshot The history threshold replacement */ public NodeChecker( final DataManager manager, final Metric<?> metric, final ARXConfiguration config, final int historyMaxSize, final double snapshotSizeDataset, final double snapshotSizeSnapshot) { // Initialize all operators this.metric = metric; this.config = config; data = manager.getDataQI(); final int initialSize = (int) (manager.getDataQI().getDataLength() * 0.01d); final IntArrayDictionary dictionarySensValue; final IntArrayDictionary dictionarySensFreq; if ((config.getRequirements() & ARXConfiguration.REQUIREMENT_DISTRIBUTION) != 0) { dictionarySensValue = new IntArrayDictionary(initialSize); dictionarySensFreq = new IntArrayDictionary(initialSize); } else { // Just to allow bytecode instrumentation dictionarySensValue = new IntArrayDictionary(0); dictionarySensFreq = new IntArrayDictionary(0); } history = new History( manager.getDataQI().getArray().length, historyMaxSize, snapshotSizeDataset, snapshotSizeSnapshot, config, dictionarySensValue, dictionarySensFreq); stateMachine = new StateMachine(history); currentGroupify = new HashGroupify(initialSize, config); lastGroupify = new HashGroupify(initialSize, config); transformer = new Transformer( manager.getDataQI().getArray(), manager.getHierarchies(), manager.getDataSE().getArray(), config, dictionarySensValue, dictionarySensFreq); }
@Override public Data transformAndMarkOutliers(final Node node) { // Apply transition and groupify currentGroupify.clear(); currentGroupify = transformer.apply(0L, node.getTransformation(), currentGroupify); // Determine outliers and set infoloss node.setAnonymous(currentGroupify.isAnonymous()); if (!node.isChecked()) { node.setChecked(); metric.evaluate(node, currentGroupify); node.setTagged(); } // Find outliers if (config.getAbsoluteMaxOutliers() != 0) { currentGroupify.markOutliers(transformer.getBuffer()); } // Return the buffer return getBuffer(); }
/** * Entry point. * * @param args the arguments */ public static void main(String[] args) throws IOException { // Define data Data data = getData(); // Define attribute types data.getDefinition().setAttributeType("age", getHierarchyAge()); data.getDefinition().setAttributeType("zipcode", getHierarchyZipcode()); data.getDefinition().setAttributeType("disease1", AttributeType.SENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("disease2", AttributeType.SENSITIVE_ATTRIBUTE); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(3)); config.addPrivacyModel( new HierarchicalDistanceTCloseness("disease1", 0.6d, getHierarchyDisease())); config.addPrivacyModel(new RecursiveCLDiversity("disease2", 3d, 2)); config.setMaxOutliers(0d); config.setQualityModel(Metric.createEntropyMetric()); // Now anonymize ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Process results if (result.getGlobalOptimum() != null) { System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } } }
/** * Returns the test cases. * * @return */ @Parameters(name = "{index}:[{0}]") public static Collection<Object[]> cases() { return Arrays.asList( new Object[][] { /* 0 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 216092.124036387, new int[] {1, 0, 1, 0, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 100, EntropyEstimator.SHANNON)), "occupation", "./data/adult.csv", 0.0d, null, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 324620.5269918692, new int[] {1, 1, 1, 1, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 3, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 180347.4325366015, new int[] {0, 0, 1, 1, 2, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 5, EntropyEstimator.SHANNON)), "occupation", "./data/adult.csv", 228878.2039109517, new int[] {1, 0, 1, 1, 2, 2, 2, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.1d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 100, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 0.0d, null, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.GRASSBERGER)), "RAMNTALL", "./data/cup.csv", 1833435.0, new int[] {4, 0, 1, 0, 1, 3, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.03d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)), "RAMNTALL", "./data/cup.csv", 4.5168281E7, new int[] {4, 4, 0, 0, 1, 3, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 5)), "RAMNTALL", "./data/cup.csv", 3.01506905E8, new int[] {4, 4, 1, 1, 1, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 3)), "RAMNTALL", "./data/cup.csv", 9.2264547E7, new int[] {4, 4, 1, 0, 1, 4, 4}, false) }, /* 10 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.SHANNON)), "RAMNTALL", "./data/cup.csv", 2823649.0, new int[] {4, 0, 0, 1, 1, 3, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.1d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)), "RAMNTALL", "./data/cup.csv", 3.4459973E7, new int[] {5, 0, 0, 2, 1, 2, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 7735322.29514608, new int[] {0, 0, 0, 1, 3, 0, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 2, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 5428093.534997522, new int[] {0, 0, 0, 0, 2, 0, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new EntropyLDiversity("EDUC", 5, EntropyEstimator.SHANNON)), "EDUC", "./data/ihis.csv", 1.2258628558792587E7, new int[] {0, 0, 0, 3, 3, 2, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 100, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 0.0d, null, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 7735322.29514608, new int[] {0, 0, 0, 1, 3, 0, 0, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.02d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new EntropyLDiversity("EDUC", 3, EntropyEstimator.SHANNON)), "EDUC", "./data/ihis.csv", 7578152.206004559, new int[] {0, 0, 0, 2, 2, 0, 0, 1}, true) }, }); }
/** * Return prosecutor risk threshold, 1 if there is none * * @return */ public double getEstimatedProsecutorRisk() { return Math.min( 1.0d / (double) getHistogram().getHistogram()[0], config != null && anonymous ? config.getRiskThresholdProsecutor() : 1d); }
/** * Return marketer risk threshold, 1 if there is none * * @return */ public double getEstimatedMarketerRisk() { return Math.min( 1.0d / getHistogram().getAvgClassSize(), config != null && anonymous ? config.getRiskThresholdMarketer() : 1d); }
/** * Entry point. * * @param args the arguments */ public static void main(String[] args) throws IOException { // Define data DefaultData data = Data.create(); data.add("age", "gender", "zipcode"); data.add("45", "female", "81675"); data.add("34", "male", "81667"); data.add("66", "male", "81925"); data.add("70", "female", "81931"); data.add("34", "female", "81931"); data.add("70", "male", "81931"); data.add("45", "male", "81931"); // Define hierarchies DefaultHierarchy age = Hierarchy.create(); age.add("34", "<50", "*"); age.add("45", "<50", "*"); age.add("66", ">=50", "*"); age.add("70", ">=50", "*"); DefaultHierarchy gender = Hierarchy.create(); gender.add("male", "*"); gender.add("female", "*"); // Only excerpts for readability DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****"); zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****"); zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****"); zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("gender", gender); data.getDefinition().setAttributeType("zipcode", zipcode); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(0d); ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Print input System.out.println(" - Input data:"); Iterator<String[]> original = data.getHandle().iterator(); while (original.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(original.next())); } // Print results System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } // Print frequencies StatisticsFrequencyDistribution distribution; System.out.println(" - Distribution of attribute 'age' in input:"); distribution = data.getHandle().getStatistics().getFrequencyDistribution(0, false); System.out.println(" " + Arrays.toString(distribution.values)); System.out.println(" " + Arrays.toString(distribution.frequency)); // Print frequencies System.out.println(" - Distribution of attribute 'age' in output:"); distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(0, true); System.out.println(" " + Arrays.toString(distribution.values)); System.out.println(" " + Arrays.toString(distribution.frequency)); // Print contingency tables StatisticsContingencyTable contingency; System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in input:"); contingency = data.getHandle().getStatistics().getContingencyTable(0, true, 2, true); System.out.println(" " + Arrays.toString(contingency.values1)); System.out.println(" " + Arrays.toString(contingency.values2)); while (contingency.iterator.hasNext()) { Entry e = contingency.iterator.next(); System.out.println(" [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]"); } // Print contingency tables System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in output:"); contingency = result.getOutput(false).getStatistics().getContingencyTable(0, true, 2, true); System.out.println(" " + Arrays.toString(contingency.values1)); System.out.println(" " + Arrays.toString(contingency.values2)); while (contingency.iterator.hasNext()) { Entry e = contingency.iterator.next(); System.out.println(" [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]"); } }
/** * Returns the test cases. * * @return */ @Parameters(name = "{index}:[{0}]") public static Collection<Object[]> cases() { return Arrays.asList( new Object[][] { /* 0 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(5)), "./data/adult.csv", 255559.85455731067, new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(100)), "./data/adult.csv", 379417.3460570988, new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(5)), "./data/adult.csv", 407289.5388925293, new int[] {1, 2, 1, 1, 3, 2, 2, 1, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(100)), "./data/adult.csv", 453196.8932458743, new int[] {0, 4, 1, 1, 3, 2, 2, 1, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(5)), "./data/adult.csv", 255559.85455731067, new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(100)), "./data/adult.csv", 379417.3460570988, new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(5)), "./data/cup.csv", 1764006.4033760305, new int[] {2, 4, 0, 1, 0, 4, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(100)), "./data/cup.csv", 1994002.8308631124, new int[] {3, 4, 1, 1, 0, 4, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(5)), "./data/cup.csv", 2445878.424834677, new int[] {4, 4, 1, 1, 1, 4, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(100)), "./data/cup.csv", 2517471.5816586106, new int[] {5, 4, 1, 0, 1, 4, 4, 4}, false) }, /* 10 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(5)), "./data/cup.csv", 1764006.4033760305, new int[] {2, 4, 0, 1, 0, 4, 4, 4}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(100)), "./data/cup.csv", 2001343.4737485605, new int[] {3, 4, 1, 1, 0, 1, 2, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(5)), "./data/fars.csv", 4469271.0, new int[] {0, 2, 2, 2, 1, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(100)), "./data/fars.csv", 5.6052481E7, new int[] {0, 2, 3, 3, 1, 2, 2, 2}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(5)), "./data/fars.csv", 1.42377891E8, new int[] {1, 2, 3, 3, 1, 2, 1, 2}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(100)), "./data/fars.csv", 4.36925397E8, new int[] {5, 2, 3, 3, 1, 2, 0, 2}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(5)), "./data/fars.csv", 4469271.0, new int[] {0, 2, 2, 2, 1, 2, 1, 0}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(100)), "./data/fars.csv", 5.6052481E7, new int[] {0, 2, 3, 3, 1, 2, 2, 2}, true) }, }); }
@Override protected void initializeInternal( final DataDefinition definition, final Data input, final GeneralizationHierarchy[] ahierarchies, final ARXConfiguration config) { // Obtain dictionary final Dictionary dictionary = input.getDictionary(); // Obtain research subset RowSet rSubset = null; if (config.containsCriterion(DPresence.class)) { Set<DPresence> crits = config.getCriteria(DPresence.class); if (crits.size() > 1) { throw new IllegalArgumentException("Only one d-presence criterion supported!"); } for (DPresence dPresence : crits) { rSubset = dPresence.getSubset().getSet(); } } // Create reference to the hierarchies final int[][] data = input.getArray(); hierarchies = new int[data[0].length][][]; for (int i = 0; i < ahierarchies.length; i++) { hierarchies[i] = ahierarchies[i].getArray(); // Column -> Id -> Level -> Output } // Initialize counts cardinalities = new int[data[0].length][][]; for (int i = 0; i < cardinalities.length; i++) { cardinalities[i] = new int[dictionary.getMapping()[i].length][ahierarchies[i].getArray()[0].length]; // Column -> Id -> Level -> Count } for (int i = 0; i < data.length; i++) { // only use the rows contained in the research subset if (rSubset == null || rSubset.contains(i)) { final int[] row = data[i]; for (int column = 0; column < row.length; column++) { cardinalities[column][row[column]][0]++; } } } // Create counts for other levels for (int column = 0; column < hierarchies.length; column++) { final int[][] hierarchy = hierarchies[column]; for (int in = 0; in < hierarchy.length; in++) { final int cardinality = cardinalities[column][in][0]; for (int level = 1; level < hierarchy[in].length; level++) { final int out = hierarchy[in][level]; cardinalities[column][out][level] += cardinality; } } } // Create a cache for the results cache = new double[hierarchies.length][]; for (int i = 0; i < cache.length; i++) { cache[i] = new double[ahierarchies[i].getArray()[0].length]; Arrays.fill(cache[i], NA); } }
/** * Returns the test cases. * * @return * @throws IOException */ @Parameters(name = "{index}:[{0}]") public static Collection<Object[]> cases() throws IOException { return Arrays.asList( new Object[][] { { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 3, 0.01d, ARXPopulationModel.create(Region.USA), CellSizeEstimator.ZERO_TRUNCATED_POISSON)), "occupation", "./data/adult.csv", 130804.5332092598, new int[] {0, 0, 1, 1, 0, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 1000, 0.01d, ARXPopulationModel.create(Region.USA), CellSizeEstimator.ZERO_TRUNCATED_POISSON)), "occupation", "./data/adult.csv", 151894.1394841501, new int[] {0, 0, 1, 1, 1, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 45.014925373134325, new int[] {1, 0, 1, 2, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 3, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 23387.494246375998, new int[] {0, 0, 1, 2, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 28551.7222913157, new int[] {1, 0, 1, 2, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 20, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 11.424242424242424, new int[] {1, 0, 1, 1, 3, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 7, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 17075.7181747451, new int[] {0, 0, 1, 1, 2, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 3, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 15121.633326877098, new int[] {0, 0, 1, 1, 1, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 45.014925373134325, new int[] {1, 0, 1, 2, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 2, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 23108.1673304724, new int[] {1, 0, 1, 1, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 10, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 30238.2081484441, new int[] {0, 1, 1, 2, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 10, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 7.215311004784689, new int[] {0, 0, 1, 1, 3, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 17053.8743069776, new int[] {0, 0, 1, 0, 2, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 3, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 15121.633326877098, new int[] {0, 0, 1, 1, 1, 2, 1, 0}, false) }, }); }
/** * Entry point. * * @param args the arguments * @throws IOException */ public static void main(String[] args) throws IOException { // Define data DefaultData data = Data.create(); data.add("zipcode", "age", "disease"); data.add("47677", "29", "gastric ulcer"); data.add("47602", "22", "gastritis"); data.add("47678", "27", "stomach cancer"); data.add("47905", "43", "gastritis"); data.add("47909", "52", "flu"); data.add("47906", "47", "bronchitis"); data.add("47605", "30", "bronchitis"); data.add("47673", "36", "pneumonia"); data.add("47607", "32", "stomach cancer"); // Define hierarchies DefaultHierarchy age = Hierarchy.create(); age.add("29", "<=40", "*"); age.add("22", "<=40", "*"); age.add("27", "<=40", "*"); age.add("43", ">40", "*"); age.add("52", ">40", "*"); age.add("47", ">40", "*"); age.add("30", "<=40", "*"); age.add("36", "<=40", "*"); age.add("32", "<=40", "*"); // Only excerpts for readability DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****"); // Define sensitive value hierarchy DefaultHierarchy disease = Hierarchy.create(); disease.add( "flu", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pneumonia", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "bronchitis", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary edema", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary embolism", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "gastric ulcer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "stomach cancer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "gastritis", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colitis", "colon disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colon cancer", "colon disease", "digestive system disease", "respiratory & digestive system disease"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("zipcode", zipcode); data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(3)); config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease)); config.setMaxOutliers(0d); config.setMetric(Metric.createEntropyMetric()); // Now anonymize ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Process results System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } }