/** @return */ private static Hierarchy getHierarchyDisease() { DefaultHierarchy disease = Hierarchy.create(); disease.add( "flu", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pneumonia", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "bronchitis", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary edema", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary embolism", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "gastric ulcer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "stomach cancer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "gastritis", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colitis", "colon disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colon cancer", "colon disease", "digestive system disease", "respiratory & digestive system disease"); return disease; }
/** @return */ private static Hierarchy getHierarchyZipcode() { DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****"); return zipcode; }
/** @return */ private static Hierarchy getHierarchyAge() { DefaultHierarchy age = Hierarchy.create(); age.add("29", "<=40", "*"); age.add("22", "<=40", "*"); age.add("27", "<=40", "*"); age.add("43", ">40", "*"); age.add("52", ">40", "*"); age.add("47", ">40", "*"); age.add("30", "<=40", "*"); age.add("36", "<=40", "*"); age.add("32", "<=40", "*"); return age; }
/** * Performs a test. * * @throws IOException */ @Test public void testEmptyHierarchy() throws IOException { final ARXAnonymizer anonymizer = new ARXAnonymizer(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", Hierarchy.create()); try { final ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(1.2d); anonymizer.anonymize(provider.getData(), config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); }
/** * Returns a summary statistics object for the given attribute * * @param generalization * @param dataType * @param baseDataType * @param hierarchy * @return */ private <U, V> StatisticsSummaryOrdinal getSummaryStatisticsOrdinal( final int generalization, final DataType<U> dataType, final DataType<V> baseDataType, final Hierarchy hierarchy) { // TODO: It would be cleaner to return an ARXOrderedString for generalized variables // TODO: that have a suitable data type directly from the DataHandle if (generalization == 0 || !(dataType instanceof ARXString)) { return new StatisticsSummaryOrdinal(dataType); } else if (baseDataType instanceof ARXString) { return new StatisticsSummaryOrdinal(dataType); } else if (hierarchy == null) { return new StatisticsSummaryOrdinal(dataType); } else { final String[][] array = hierarchy.getHierarchy(); final Map<String, String> map = new HashMap<String, String>(); for (int i = 0; i < array.length; i++) { map.put(array[i][generalization], array[i][0]); } return new StatisticsSummaryOrdinal( new Comparator<String>() { public int compare(String o1, String o2) { V _o1 = null; try { _o1 = baseDataType.parse(map.get(o1)); } catch (Exception e) { // Nothing to do } V _o2 = null; try { _o2 = baseDataType.parse(map.get(o2)); } catch (Exception e) { // Nothing to do } try { return baseDataType.compare(_o1, _o2); } catch (Exception e) { return 0; } } }); } }
/** * Returns an ordered list of the distinct set of data items from the given column. This method * assumes that the order of string data items can (and should) be derived from the provided * hierarchy * * @param column The column * @param hierarchy The hierarchy, may be null * @return */ public String[] getDistinctValuesOrdered(int column, Hierarchy hierarchy) { // Reset stop flag interrupt = false; // Obtain list and data type final String[] list = getDistinctValues(column); final String attribute = handle.getAttributeName(column); final DataType<?> datatype = handle.getDataType(attribute); final int level = handle.getGeneralization(attribute); final String[][] _hierarchy = hierarchy != null ? hierarchy.getHierarchy() : null; // Sort by data type if (_hierarchy == null || level == 0) { sort(list, datatype, handle.getSuppressionString()); // Sort by hierarchy and data type } else { // Build order directly from the hierarchy final Map<String, Integer> order = new HashMap<String, Integer>(); int max = 0; // The order to use for the suppression string // Create base order Set<String> baseSet = new HashSet<String>(); DataType<?> baseType = handle.getBaseDataType(attribute); for (int i = 0; i < _hierarchy.length; i++) { String element = _hierarchy[i][0]; checkInterrupt(); // Make sure that only elements from the hierarchy // are added that are included in the data // TODO: Calling isValid is only a work-around if (baseType.isValid(element)) baseSet.add(element); } String[] baseArray = baseSet.toArray(new String[baseSet.size()]); sort(baseArray, handle.getBaseDataType(attribute), handle.getSuppressionString()); Map<String, Integer> baseOrder = new HashMap<String, Integer>(); for (int i = 0; i < baseArray.length; i++) { checkInterrupt(); baseOrder.put(baseArray[i], i); } // Build higher level order from base order for (int i = 0; i < _hierarchy.length; i++) { checkInterrupt(); if (!order.containsKey(_hierarchy[i][level])) { Integer position = baseOrder.get(_hierarchy[i][0]); if (position != null) { order.put(_hierarchy[i][level], position); max = Math.max(position, max) + 1; } } } // Add suppression string String supp = handle.getSuppressionString(); if (supp != null) order.put(supp, max); // Sort sort(list, order); } // Done return list; }
/** * Entry point. * * @param args the arguments */ public static void main(String[] args) throws IOException { // Define data DefaultData data = Data.create(); data.add("age", "gender", "zipcode"); data.add("45", "female", "81675"); data.add("34", "male", "81667"); data.add("66", "male", "81925"); data.add("70", "female", "81931"); data.add("34", "female", "81931"); data.add("70", "male", "81931"); data.add("45", "male", "81931"); // Define hierarchies DefaultHierarchy age = Hierarchy.create(); age.add("34", "<50", "*"); age.add("45", "<50", "*"); age.add("66", ">=50", "*"); age.add("70", ">=50", "*"); DefaultHierarchy gender = Hierarchy.create(); gender.add("male", "*"); gender.add("female", "*"); // Only excerpts for readability DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****"); zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****"); zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****"); zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("gender", gender); data.getDefinition().setAttributeType("zipcode", zipcode); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(2)); config.setMaxOutliers(0d); ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Print input System.out.println(" - Input data:"); Iterator<String[]> original = data.getHandle().iterator(); while (original.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(original.next())); } // Print results System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } // Print frequencies StatisticsFrequencyDistribution distribution; System.out.println(" - Distribution of attribute 'age' in input:"); distribution = data.getHandle().getStatistics().getFrequencyDistribution(0, false); System.out.println(" " + Arrays.toString(distribution.values)); System.out.println(" " + Arrays.toString(distribution.frequency)); // Print frequencies System.out.println(" - Distribution of attribute 'age' in output:"); distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(0, true); System.out.println(" " + Arrays.toString(distribution.values)); System.out.println(" " + Arrays.toString(distribution.frequency)); // Print contingency tables StatisticsContingencyTable contingency; System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in input:"); contingency = data.getHandle().getStatistics().getContingencyTable(0, true, 2, true); System.out.println(" " + Arrays.toString(contingency.values1)); System.out.println(" " + Arrays.toString(contingency.values2)); while (contingency.iterator.hasNext()) { Entry e = contingency.iterator.next(); System.out.println(" [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]"); } // Print contingency tables System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in output:"); contingency = result.getOutput(false).getStatistics().getContingencyTable(0, true, 2, true); System.out.println(" " + Arrays.toString(contingency.values1)); System.out.println(" " + Arrays.toString(contingency.values2)); while (contingency.iterator.hasNext()) { Entry e = contingency.iterator.next(); System.out.println(" [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]"); } }
/** * Entry point. * * @param args the arguments * @throws IOException */ public static void main(String[] args) throws IOException { // Define data DefaultData data = Data.create(); data.add("zipcode", "age", "disease"); data.add("47677", "29", "gastric ulcer"); data.add("47602", "22", "gastritis"); data.add("47678", "27", "stomach cancer"); data.add("47905", "43", "gastritis"); data.add("47909", "52", "flu"); data.add("47906", "47", "bronchitis"); data.add("47605", "30", "bronchitis"); data.add("47673", "36", "pneumonia"); data.add("47607", "32", "stomach cancer"); // Define hierarchies DefaultHierarchy age = Hierarchy.create(); age.add("29", "<=40", "*"); age.add("22", "<=40", "*"); age.add("27", "<=40", "*"); age.add("43", ">40", "*"); age.add("52", ">40", "*"); age.add("47", ">40", "*"); age.add("30", "<=40", "*"); age.add("36", "<=40", "*"); age.add("32", "<=40", "*"); // Only excerpts for readability DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****"); // Define sensitive value hierarchy DefaultHierarchy disease = Hierarchy.create(); disease.add( "flu", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pneumonia", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "bronchitis", "respiratory infection", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary edema", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "pulmonary embolism", "vascular lung disease", "vascular lung disease", "respiratory & digestive system disease"); disease.add( "gastric ulcer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "stomach cancer", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "gastritis", "stomach disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colitis", "colon disease", "digestive system disease", "respiratory & digestive system disease"); disease.add( "colon cancer", "colon disease", "digestive system disease", "respiratory & digestive system disease"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("zipcode", zipcode); data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addCriterion(new KAnonymity(3)); config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease)); config.setMaxOutliers(0d); config.setMetric(Metric.createEntropyMetric()); // Now anonymize ARXResult result = anonymizer.anonymize(data, config); // Print info printResult(result, data); // Process results System.out.println(" - Transformed data:"); Iterator<String[]> transformed = result.getOutput(false).iterator(); while (transformed.hasNext()) { System.out.print(" "); System.out.println(Arrays.toString(transformed.next())); } }