@Override @Deprecated public double getInformationLoss(final Node node) { check(node); metric.evaluate(node, currentGroupify); return node.getInformationLoss().getValue(); }
@Override public void check(final Node node) { // Store snapshot from last check if (stateMachine.getLastNode() != null) { history.store( stateMachine.getLastNode(), currentGroupify, stateMachine.getLastTransition().snapshot); } // Transition final Transition transition = stateMachine.transition(node); // Switch groupifies final IHashGroupify temp = lastGroupify; lastGroupify = currentGroupify; currentGroupify = temp; currentGroupify.clear(); // Apply transition switch (transition.type) { case UNOPTIMIZED: currentGroupify = transformer.apply(transition.projection, node.getTransformation(), currentGroupify); break; case ROLLUP: currentGroupify = transformer.applyRollup( transition.projection, node.getTransformation(), lastGroupify, currentGroupify); break; case SNAPSHOT: currentGroupify = transformer.applySnapshot( transition.projection, node.getTransformation(), currentGroupify, transition.snapshot); break; } // Mark as checked node.setChecked(); // Propagate k-anonymity node.setKAnonymous(currentGroupify.isKAnonymous()); // Propagate anonymity and information loss if (currentGroupify.isAnonymous()) { node.setAnonymous(true); metric.evaluate(node, currentGroupify); } else { node.setInformationLoss(null); node.setAnonymous(false); } }
@Override @Deprecated public Data transform(final Node node) { // Apply transition and groupify currentGroupify.clear(); currentGroupify = transformer.apply(0L, node.getTransformation(), currentGroupify); // Determine outliers and set infoloss if (!node.isChecked()) { node.setChecked(); node.setAnonymous(currentGroupify.isAnonymous()); metric.evaluate(node, currentGroupify); node.setTagged(); } return getBuffer(); }
@Override public Data transformAndMarkOutliers(final Node node) { // Apply transition and groupify currentGroupify.clear(); currentGroupify = transformer.apply(0L, node.getTransformation(), currentGroupify); // Determine outliers and set infoloss node.setAnonymous(currentGroupify.isAnonymous()); if (!node.isChecked()) { node.setChecked(); metric.evaluate(node, currentGroupify); node.setTagged(); } // Find outliers if (config.getAbsoluteMaxOutliers() != 0) { currentGroupify.markOutliers(transformer.getBuffer()); } // Return the buffer return getBuffer(); }
/** * Entry point. * * @param args the arguments */ public static void main(String[] args) throws IOException { // Define data Data data = getData(); // Define attribute types data.getDefinition().setAttributeType("age", getHierarchyAge()); data.getDefinition().setAttributeType("zipcode", getHierarchyZipcode()); data.getDefinition().setAttributeType("disease1", AttributeType.SENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("disease2", AttributeType.SENSITIVE_ATTRIBUTE); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(3)); config.addPrivacyModel( new HierarchicalDistanceTCloseness("disease1", 0.6d, getHierarchyDisease())); config.addPrivacyModel(new RecursiveCLDiversity("disease2", 3d, 2)); config.setMaxOutliers(0d); config.setQualityModel(Metric.createEntropyMetric()); // Now anonymize ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Process results if (result.getGlobalOptimum() != null) { System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } } }
/** * Returns the test cases. * * @return * @throws IOException */ @Parameters(name = "{index}:[{0}]") public static Collection<Object[]> cases() throws IOException { return Arrays.asList( new Object[][] { { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 3, 0.01d, ARXPopulationModel.create(Region.USA), CellSizeEstimator.ZERO_TRUNCATED_POISSON)), "occupation", "./data/adult.csv", 130804.5332092598, new int[] {0, 0, 1, 1, 0, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 1000, 0.01d, ARXPopulationModel.create(Region.USA), CellSizeEstimator.ZERO_TRUNCATED_POISSON)), "occupation", "./data/adult.csv", 151894.1394841501, new int[] {0, 0, 1, 1, 1, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 45.014925373134325, new int[] {1, 0, 1, 2, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 3, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 23387.494246375998, new int[] {0, 0, 1, 2, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 28551.7222913157, new int[] {1, 0, 1, 2, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 20, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 11.424242424242424, new int[] {1, 0, 1, 1, 3, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 7, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 17075.7181747451, new int[] {0, 0, 1, 1, 2, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 3, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 15121.633326877098, new int[] {0, 0, 1, 1, 1, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 45.014925373134325, new int[] {1, 0, 1, 2, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 2, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 23108.1673304724, new int[] {1, 0, 1, 1, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 10, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 30238.2081484441, new int[] {0, 1, 1, 2, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createAECSMetric()) .addPrivacyModel( new KMap( 10, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 7.215311004784689, new int[] {0, 0, 1, 1, 3, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new KMap( 5, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 17053.8743069776, new int[] {0, 0, 1, 0, 2, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new KMap( 3, DataSubset.create( Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'), Data.create( "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))), "occupation", "./data/adult.csv", 15121.633326877098, new int[] {0, 0, 1, 1, 1, 2, 1, 0}, false) }, }); }
/** * Update the view. * * @param part */ protected void doUpdate(ModelPart part) { // Check model if (model == null) { return; } // Obtain definition DataDefinition definition = model.getOutputDefinition(); if (definition == null) definition = model.getInputDefinition(); // Obtain relevant configuration objects; ModelConfiguration config = null; Metric<?> metric = null; if (model.getOutputConfig() != null) { config = model.getOutputConfig(); metric = config.getMetric(); // We don't need to update in many cases, if we are displaying an output configuration if (part == ModelPart.ATTRIBUTE_TYPE || part == ModelPart.METRIC || part == ModelPart.ATTRIBUTE_WEIGHT || part == ModelPart.GS_FACTOR || part == ModelPart.MAX_OUTLIERS || part == ModelPart.DATA_TYPE || part == ModelPart.FINANCIAL_MODEL) { return; } } else { config = model.getInputConfig(); // TODO: This is such an ugly hack metric = model.getMetricDescription().createInstance(model.getMetricConfiguration()); } // Check if (definition == null || config == null || model.getInputConfig().getInput() == null) { reset(); return; } // Obtain handle DataHandle data = model.getInputConfig().getInput().getHandle(); // Disable redrawing root.setRedraw(false); // Clear roots.clear(); // Print basic properties new Property( Resources.getMessage("PropertiesView.9"), new String[] {String.valueOf(data.getNumRows())}); // $NON-NLS-1$ new Property( Resources.getMessage("PropertiesView.10"), new String[] { SWTUtil.getPrettyString(config.getAllowedOutliers() * 100d) + Resources.getMessage("PropertiesView.11") }); //$NON-NLS-1$ //$NON-NLS-2$ // Utility measure Property m = new Property( Resources.getMessage("PropertiesView.114"), new String[] {metric.getDescription().getName()}); // $NON-NLS-1$ // Properties of the utility measure if (metric.getAggregateFunction() != null) { new Property( m, Resources.getMessage("PropertiesView.149"), new String[] {metric.getAggregateFunction().toString()}); // $NON-NLS-1$ } if (metric.isGSFactorSupported()) { new Property( m, Resources.getMessage("PropertiesView.151"), new String[] { SWTUtil.getPrettyString(metric.getGeneralizationSuppressionFactor()) }); //$NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.152"), new String[] {SWTUtil.getPrettyString(metric.getGeneralizationFactor())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.153"), new String[] {SWTUtil.getPrettyString(metric.getSuppressionFactor())}); // $NON-NLS-1$ } new Property( m, Resources.getMessage("PropertiesView.155"), new String[] {SWTUtil.getPrettyString(metric.isMonotonic())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.156"), new String[] {SWTUtil.getPrettyString(metric.isWeighted())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.157"), new String[] {SWTUtil.getPrettyString(metric.isPrecomputed())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.158"), new String[] { SWTUtil.getPrettyString(metric.isAbleToHandleMicroaggregation()) }); //$NON-NLS-1$ // Financial configuration if (metric instanceof MetricSDNMPublisherPayout) { // Obtain for output data ARXFinancialConfiguration financial = ((MetricSDNMPublisherPayout) metric).getFinancialConfiguration(); // Obtain for input only. This is a bit ugly. if (financial == null) { financial = ARXFinancialConfiguration.create(); financial .setAdversaryCost(config.getAdversaryCost()) .setAdversaryGain(config.getAdversaryGain()) .setPublisherBenefit(config.getPublisherBenefit()) .setPublisherLoss(config.getPublisherLoss()); } // Render new Property( m, Resources.getMessage("PropertiesView.135"), new String[] {SWTUtil.getPrettyString(financial.getPublisherBenefit())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.136"), new String[] {SWTUtil.getPrettyString(financial.getPublisherLoss())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.137"), new String[] {SWTUtil.getPrettyString(financial.getAdversaryGain())}); // $NON-NLS-1$ new Property( m, Resources.getMessage("PropertiesView.138"), new String[] {SWTUtil.getPrettyString(financial.getAdversaryCost())}); // $NON-NLS-1$ if (((MetricSDNMPublisherPayout) metric).isProsecutorAttackerModel()) { new Property( m, Resources.getMessage("PropertiesView.139"), new String[] {Resources.getMessage("PropertiesView.160")}); // $NON-NLS-1$ //$NON-NLS-2$ } if (((MetricSDNMPublisherPayout) metric).isJournalistAttackerModel()) { new Property( m, Resources.getMessage("PropertiesView.139"), new String[] {Resources.getMessage("PropertiesView.161")}); // $NON-NLS-1$ //$NON-NLS-2$ } } // Attributes final Property attributes = new Property( Resources.getMessage("PropertiesView.12"), new String[] {String.valueOf(data.getNumColumns())}); // $NON-NLS-1$ // Print identifying attributes final Property identifying = new Property( attributes, Resources.getMessage("PropertiesView.13"), new String[] { String.valueOf(definition.getIdentifyingAttributes().size()) }); //$NON-NLS-1$ int index = 0; for (int i = 0; i < data.getNumColumns(); i++) { final String s = data.getAttributeName(i); if (definition.getIdentifyingAttributes().contains(s)) { final String[] values = new String[] { "", "", "", "", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ values[0] = s; values[1] = definition.getDataType(s).toString(); new Property( identifying, Resources.getMessage("PropertiesView.19") + (index++), values); //$NON-NLS-1$ } } // Print quasi-identifying attributes final Property quasiIdentifying = new Property( attributes, Resources.getMessage("PropertiesView.20"), new String[] { String.valueOf(definition.getQuasiIdentifyingAttributes().size()) }); //$NON-NLS-1$ index = 0; for (int i = 0; i < data.getNumColumns(); i++) { final String s = data.getAttributeName(i); if (definition.getQuasiIdentifyingAttributes().contains(s)) { final String[] values = new String[] { "", "", "", "", "", "", "", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ // //$NON-NLS-7$ //$NON-NLS-8$ values[0] = s; if (definition.getHierarchy(s) != null) { DataType<?> type = definition.getDataType(s); values[1] = type.getDescription().getLabel(); if (type.getDescription().hasFormat() && ((DataTypeWithFormat) type).getFormat() != null) { values[2] = ((DataTypeWithFormat) type).getFormat(); } // Determine height of hierarchy int height = 0; String[][] hierarchy = definition.getHierarchy(s); if (hierarchy != null && hierarchy.length != 0 && hierarchy[0] != null) { height = hierarchy[0].length; } values[3] = String.valueOf(height); values[4] = String.valueOf(definition.getMinimumGeneralization(s)); values[5] = String.valueOf(definition.getMaximumGeneralization(s)); } if (definition.getMicroAggregationFunction(s) != null) { values[7] = definition.getMicroAggregationFunction(s).getLabel(); } values[6] = SWTUtil.getPrettyString(config.getAttributeWeight(s)); new Property( quasiIdentifying, Resources.getMessage("PropertiesView.26") + (index++), values); //$NON-NLS-1$ } } // Print sensitive attributes final Property sensitive = new Property( attributes, Resources.getMessage("PropertiesView.27"), new String[] { String.valueOf(definition.getSensitiveAttributes().size()) }); //$NON-NLS-1$ index = 0; for (int i = 0; i < data.getNumColumns(); i++) { final String s = data.getAttributeName(i); if (definition.getSensitiveAttributes().contains(s)) { final String[] values = new String[] { "", "", "", "", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ values[0] = s; if (config.getHierarchy(s) != null && config.getHierarchy(s).getHierarchy() != null) { int height = 0; if (config.getHierarchy(s).getHierarchy().length > 0) { height = config.getHierarchy(s).getHierarchy()[0].length; } values[1] = definition.getDataType(s).toString(); values[2] = String.valueOf(height); } new Property( sensitive, Resources.getMessage("PropertiesView.33") + (index++), values); //$NON-NLS-1$ } } // Print insensitive attributes final Property insensitive = new Property( attributes, Resources.getMessage("PropertiesView.34"), new String[] { String.valueOf(definition.getInsensitiveAttributes().size()) }); //$NON-NLS-1$ index = 0; for (int i = 0; i < data.getNumColumns(); i++) { final String s = data.getAttributeName(i); if (definition.getInsensitiveAttributes().contains(s)) { final String[] values = new String[] { "", "", "", "", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ values[0] = s; values[1] = definition.getDataType(s).toString(); new Property( insensitive, Resources.getMessage("PropertiesView.40") + (index++), values); //$NON-NLS-1$ } } // Refresh and initialize refresh(); // Redraw root.setRedraw(true); }
/** * Returns the test cases. * * @return */ @Parameters(name = "{index}:[{0}]") public static Collection<Object[]> cases() { return Arrays.asList( new Object[][] { /* 0 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(5)), "./data/adult.csv", 255559.85455731067, new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(100)), "./data/adult.csv", 379417.3460570988, new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(5)), "./data/adult.csv", 407289.5388925293, new int[] {1, 2, 1, 1, 3, 2, 2, 1, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(100)), "./data/adult.csv", 453196.8932458743, new int[] {0, 4, 1, 1, 3, 2, 2, 1, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(5)), "./data/adult.csv", 255559.85455731067, new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new KAnonymity(100)), "./data/adult.csv", 379417.3460570988, new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(5)), "./data/cup.csv", 1764006.4033760305, new int[] {2, 4, 0, 1, 0, 4, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(100)), "./data/cup.csv", 1994002.8308631124, new int[] {3, 4, 1, 1, 0, 4, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(5)), "./data/cup.csv", 2445878.424834677, new int[] {4, 4, 1, 1, 1, 4, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(100)), "./data/cup.csv", 2517471.5816586106, new int[] {5, 4, 1, 0, 1, 4, 4, 4}, false) }, /* 10 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(5)), "./data/cup.csv", 1764006.4033760305, new int[] {2, 4, 0, 1, 0, 4, 4, 4}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel(new KAnonymity(100)), "./data/cup.csv", 2001343.4737485605, new int[] {3, 4, 1, 1, 0, 1, 2, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(5)), "./data/fars.csv", 4469271.0, new int[] {0, 2, 2, 2, 1, 2, 1, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(100)), "./data/fars.csv", 5.6052481E7, new int[] {0, 2, 3, 3, 1, 2, 2, 2}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(5)), "./data/fars.csv", 1.42377891E8, new int[] {1, 2, 3, 3, 1, 2, 1, 2}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(100)), "./data/fars.csv", 4.36925397E8, new int[] {5, 2, 3, 3, 1, 2, 0, 2}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(5)), "./data/fars.csv", 4469271.0, new int[] {0, 2, 2, 2, 1, 2, 1, 0}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new KAnonymity(100)), "./data/fars.csv", 5.6052481E7, new int[] {0, 2, 3, 3, 1, 2, 2, 2}, true) }, }); }
/** * Returns the test cases. * * @return */ @Parameters(name = "{index}:[{0}]") public static Collection<Object[]> cases() { return Arrays.asList( new Object[][] { /* 0 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 216092.124036387, new int[] {1, 0, 1, 0, 3, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 100, EntropyEstimator.SHANNON)), "occupation", "./data/adult.csv", 0.0d, null, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 324620.5269918692, new int[] {1, 1, 1, 1, 3, 2, 2, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 3, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 180347.4325366015, new int[] {0, 0, 1, 1, 2, 2, 2, 0}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 5, EntropyEstimator.SHANNON)), "occupation", "./data/adult.csv", 228878.2039109517, new int[] {1, 0, 1, 1, 2, 2, 2, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.1d, Metric.createPrecomputedEntropyMetric(0.1d, false)) .addPrivacyModel( new EntropyLDiversity("occupation", 100, EntropyEstimator.GRASSBERGER)), "occupation", "./data/adult.csv", 0.0d, null, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.GRASSBERGER)), "RAMNTALL", "./data/cup.csv", 1833435.0, new int[] {4, 0, 1, 0, 1, 3, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.03d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)), "RAMNTALL", "./data/cup.csv", 4.5168281E7, new int[] {4, 4, 0, 0, 1, 3, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 5)), "RAMNTALL", "./data/cup.csv", 3.01506905E8, new int[] {4, 4, 1, 1, 1, 4, 4}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 3)), "RAMNTALL", "./data/cup.csv", 9.2264547E7, new int[] {4, 4, 1, 0, 1, 4, 4}, false) }, /* 10 */ { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.SHANNON)), "RAMNTALL", "./data/cup.csv", 2823649.0, new int[] {4, 0, 0, 1, 1, 3, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.1d, Metric.createDiscernabilityMetric(true)) .addPrivacyModel( new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)), "RAMNTALL", "./data/cup.csv", 3.4459973E7, new int[] {5, 0, 0, 2, 1, 2, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 7735322.29514608, new int[] {0, 0, 0, 1, 3, 0, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 2, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 5428093.534997522, new int[] {0, 0, 0, 0, 2, 0, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new EntropyLDiversity("EDUC", 5, EntropyEstimator.SHANNON)), "EDUC", "./data/ihis.csv", 1.2258628558792587E7, new int[] {0, 0, 0, 3, 3, 2, 0, 1}, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 100, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 0.0d, null, false) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel( new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)), "EDUC", "./data/ihis.csv", 7735322.29514608, new int[] {0, 0, 0, 1, 3, 0, 0, 1}, true) }, { new ARXAnonymizationTestCase( ARXConfiguration.create(0.02d, Metric.createPrecomputedEntropyMetric(0.1d, true)) .addPrivacyModel(new EntropyLDiversity("EDUC", 3, EntropyEstimator.SHANNON)), "EDUC", "./data/ihis.csv", 7578152.206004559, new int[] {0, 0, 0, 2, 2, 0, 0, 1}, true) }, }); }
/** * Entry point. * * @param args the arguments * @throws IOException */ public static void main(String[] args) throws IOException { // Define data DefaultData data = Data.create(); data.add("zipcode", "age", "disease"); data.add("47677", "29", "gastric ulcer"); data.add("47602", "22", "gastritis"); data.add("47678", "27", "stomach cancer"); data.add("47905", "43", "gastritis"); data.add("47909", "52", "flu"); data.add("47906", "47", "bronchitis"); data.add("47605", "30", "bronchitis"); data.add("47673", "36", "pneumonia"); data.add("47607", "32", "stomach cancer"); // Define hierarchies DefaultHierarchy age = Hierarchy.create(); age.add("29", "<=40", "*"); age.add("22", "<=40", "*"); age.add("27", "<=40", "*"); age.add("43", ">40", "*"); age.add("52", ">40", "*"); age.add("47", ">40", "*"); age.add("30", "<=40", "*"); age.add("36", "<=40", "*"); age.add("32", "<=40", "*"); // Only excerpts for readability DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****"); // Define sensitive value hierarchy DefaultHierarchy disease = Hierarchy.create(); disease.add( "flu", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pneumonia", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "bronchitis", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary edema", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary embolism", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "gastric ulcer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "stomach cancer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "gastritis", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colitis", "colon disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colon cancer", "colon disease", "digestive system disease", "respiratory & digestive system disease"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("zipcode", zipcode); data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(3)); config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease)); config.setMaxOutliers(0d); config.setMetric(Metric.createEntropyMetric()); // Now anonymize ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Process results System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } }