예제 #1
0
 /** @return */
 private static Hierarchy getHierarchyDisease() {
   DefaultHierarchy disease = Hierarchy.create();
   disease.add(
       "flu",
       "respiratory infection",
       "vascular lung disease",
       "respiratory & digestive system disease");
   disease.add(
       "pneumonia",
       "respiratory infection",
       "vascular lung disease",
       "respiratory & digestive system disease");
   disease.add(
       "bronchitis",
       "respiratory infection",
       "vascular lung disease",
       "respiratory & digestive system disease");
   disease.add(
       "pulmonary edema",
       "vascular lung disease",
       "vascular lung disease",
       "respiratory & digestive system disease");
   disease.add(
       "pulmonary embolism",
       "vascular lung disease",
       "vascular lung disease",
       "respiratory & digestive system disease");
   disease.add(
       "gastric ulcer",
       "stomach disease",
       "digestive system disease",
       "respiratory & digestive system disease");
   disease.add(
       "stomach cancer",
       "stomach disease",
       "digestive system disease",
       "respiratory & digestive system disease");
   disease.add(
       "gastritis",
       "stomach disease",
       "digestive system disease",
       "respiratory & digestive system disease");
   disease.add(
       "colitis",
       "colon disease",
       "digestive system disease",
       "respiratory & digestive system disease");
   disease.add(
       "colon cancer",
       "colon disease",
       "digestive system disease",
       "respiratory & digestive system disease");
   return disease;
 }
예제 #2
0
 /** @return */
 private static Hierarchy getHierarchyZipcode() {
   DefaultHierarchy zipcode = Hierarchy.create();
   zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****");
   zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****");
   zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****");
   zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****");
   zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****");
   zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****");
   zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****");
   zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****");
   zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****");
   return zipcode;
 }
예제 #3
0
 /** @return */
 private static Hierarchy getHierarchyAge() {
   DefaultHierarchy age = Hierarchy.create();
   age.add("29", "<=40", "*");
   age.add("22", "<=40", "*");
   age.add("27", "<=40", "*");
   age.add("43", ">40", "*");
   age.add("52", ">40", "*");
   age.add("47", ">40", "*");
   age.add("30", "<=40", "*");
   age.add("36", "<=40", "*");
   age.add("32", "<=40", "*");
   return age;
 }
예제 #4
0
  /**
   * Performs a test.
   *
   * @throws IOException
   */
  @Test
  public void testEmptyHierarchy() throws IOException {

    final ARXAnonymizer anonymizer = new ARXAnonymizer();
    final Data data = provider.getData();
    data.getDefinition().setAttributeType("age", Hierarchy.create());
    try {
      final ARXConfiguration config = ARXConfiguration.create();
      config.addCriterion(new KAnonymity(2));
      config.setMaxOutliers(1.2d);
      anonymizer.anonymize(provider.getData(), config);

    } catch (final IllegalArgumentException e) {
      return;
    }
    Assert.fail();
  }
예제 #5
0
  /**
   * Returns a summary statistics object for the given attribute
   *
   * @param generalization
   * @param dataType
   * @param baseDataType
   * @param hierarchy
   * @return
   */
  private <U, V> StatisticsSummaryOrdinal getSummaryStatisticsOrdinal(
      final int generalization,
      final DataType<U> dataType,
      final DataType<V> baseDataType,
      final Hierarchy hierarchy) {

    // TODO: It would be cleaner to return an ARXOrderedString for generalized variables
    // TODO: that have a suitable data type directly from the DataHandle
    if (generalization == 0 || !(dataType instanceof ARXString)) {
      return new StatisticsSummaryOrdinal(dataType);
    } else if (baseDataType instanceof ARXString) {
      return new StatisticsSummaryOrdinal(dataType);
    } else if (hierarchy == null) {
      return new StatisticsSummaryOrdinal(dataType);
    } else {
      final String[][] array = hierarchy.getHierarchy();
      final Map<String, String> map = new HashMap<String, String>();
      for (int i = 0; i < array.length; i++) {
        map.put(array[i][generalization], array[i][0]);
      }
      return new StatisticsSummaryOrdinal(
          new Comparator<String>() {
            public int compare(String o1, String o2) {
              V _o1 = null;
              try {
                _o1 = baseDataType.parse(map.get(o1));
              } catch (Exception e) {
                // Nothing to do
              }
              V _o2 = null;
              try {
                _o2 = baseDataType.parse(map.get(o2));
              } catch (Exception e) {
                // Nothing to do
              }
              try {
                return baseDataType.compare(_o1, _o2);
              } catch (Exception e) {
                return 0;
              }
            }
          });
    }
  }
예제 #6
0
  /**
   * Returns an ordered list of the distinct set of data items from the given column. This method
   * assumes that the order of string data items can (and should) be derived from the provided
   * hierarchy
   *
   * @param column The column
   * @param hierarchy The hierarchy, may be null
   * @return
   */
  public String[] getDistinctValuesOrdered(int column, Hierarchy hierarchy) {

    // Reset stop flag
    interrupt = false;

    // Obtain list and data type
    final String[] list = getDistinctValues(column);
    final String attribute = handle.getAttributeName(column);
    final DataType<?> datatype = handle.getDataType(attribute);
    final int level = handle.getGeneralization(attribute);
    final String[][] _hierarchy = hierarchy != null ? hierarchy.getHierarchy() : null;

    // Sort by data type
    if (_hierarchy == null || level == 0) {
      sort(list, datatype, handle.getSuppressionString());
      // Sort by hierarchy and data type
    } else {
      // Build order directly from the hierarchy
      final Map<String, Integer> order = new HashMap<String, Integer>();
      int max = 0; // The order to use for the suppression string

      // Create base order
      Set<String> baseSet = new HashSet<String>();
      DataType<?> baseType = handle.getBaseDataType(attribute);
      for (int i = 0; i < _hierarchy.length; i++) {
        String element = _hierarchy[i][0];
        checkInterrupt();
        // Make sure that only elements from the hierarchy
        // are added that are included in the data
        // TODO: Calling isValid is only a work-around
        if (baseType.isValid(element)) baseSet.add(element);
      }
      String[] baseArray = baseSet.toArray(new String[baseSet.size()]);
      sort(baseArray, handle.getBaseDataType(attribute), handle.getSuppressionString());
      Map<String, Integer> baseOrder = new HashMap<String, Integer>();
      for (int i = 0; i < baseArray.length; i++) {
        checkInterrupt();
        baseOrder.put(baseArray[i], i);
      }

      // Build higher level order from base order
      for (int i = 0; i < _hierarchy.length; i++) {
        checkInterrupt();
        if (!order.containsKey(_hierarchy[i][level])) {
          Integer position = baseOrder.get(_hierarchy[i][0]);
          if (position != null) {
            order.put(_hierarchy[i][level], position);
            max = Math.max(position, max) + 1;
          }
        }
      }

      // Add suppression string
      String supp = handle.getSuppressionString();
      if (supp != null) order.put(supp, max);

      // Sort
      sort(list, order);
    }

    // Done
    return list;
  }
예제 #7
0
  /**
   * Entry point.
   *
   * @param args the arguments
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("age", "gender", "zipcode");
    data.add("45", "female", "81675");
    data.add("34", "male", "81667");
    data.add("66", "male", "81925");
    data.add("70", "female", "81931");
    data.add("34", "female", "81931");
    data.add("70", "male", "81931");
    data.add("45", "male", "81931");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("34", "<50", "*");
    age.add("45", "<50", "*");
    age.add("66", ">=50", "*");
    age.add("70", ">=50", "*");

    DefaultHierarchy gender = Hierarchy.create();
    gender.add("male", "*");
    gender.add("female", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****");
    zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****");
    zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****");
    zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("gender", gender);
    data.getDefinition().setAttributeType("zipcode", zipcode);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(2));
    config.setMaxOutliers(0d);

    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Print input
    System.out.println(" - Input data:");
    Iterator<String[]> original = data.getHandle().iterator();
    while (original.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(original.next()));
    }

    // Print results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }

    // Print frequencies
    StatisticsFrequencyDistribution distribution;
    System.out.println(" - Distribution of attribute 'age' in input:");
    distribution = data.getHandle().getStatistics().getFrequencyDistribution(0, false);
    System.out.println("   " + Arrays.toString(distribution.values));
    System.out.println("   " + Arrays.toString(distribution.frequency));

    // Print frequencies
    System.out.println(" - Distribution of attribute 'age' in output:");
    distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(0, true);
    System.out.println("   " + Arrays.toString(distribution.values));
    System.out.println("   " + Arrays.toString(distribution.frequency));

    // Print contingency tables
    StatisticsContingencyTable contingency;
    System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in input:");
    contingency = data.getHandle().getStatistics().getContingencyTable(0, true, 2, true);
    System.out.println("   " + Arrays.toString(contingency.values1));
    System.out.println("   " + Arrays.toString(contingency.values2));
    while (contingency.iterator.hasNext()) {
      Entry e = contingency.iterator.next();
      System.out.println("   [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]");
    }

    // Print contingency tables
    System.out.println(" - Contingency of attribute 'gender' and 'zipcode' in output:");
    contingency = result.getOutput(false).getStatistics().getContingencyTable(0, true, 2, true);
    System.out.println("   " + Arrays.toString(contingency.values1));
    System.out.println("   " + Arrays.toString(contingency.values2));
    while (contingency.iterator.hasNext()) {
      Entry e = contingency.iterator.next();
      System.out.println("   [" + e.value1 + ", " + e.value2 + ", " + e.frequency + "]");
    }
  }
예제 #8
0
파일: Example8.java 프로젝트: moizmhb/arx
  /**
   * Entry point.
   *
   * @param args the arguments
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("zipcode", "age", "disease");
    data.add("47677", "29", "gastric ulcer");
    data.add("47602", "22", "gastritis");
    data.add("47678", "27", "stomach cancer");
    data.add("47905", "43", "gastritis");
    data.add("47909", "52", "flu");
    data.add("47906", "47", "bronchitis");
    data.add("47605", "30", "bronchitis");
    data.add("47673", "36", "pneumonia");
    data.add("47607", "32", "stomach cancer");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("29", "<=40", "*");
    age.add("22", "<=40", "*");
    age.add("27", "<=40", "*");
    age.add("43", ">40", "*");
    age.add("52", ">40", "*");
    age.add("47", ">40", "*");
    age.add("30", "<=40", "*");
    age.add("36", "<=40", "*");
    age.add("32", "<=40", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****");

    // Define sensitive value hierarchy
    DefaultHierarchy disease = Hierarchy.create();
    disease.add(
        "flu",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pneumonia",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "bronchitis",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary edema",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary embolism",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastric ulcer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "stomach cancer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastritis",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colitis",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colon cancer",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("zipcode", zipcode);
    data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(3));
    config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease));
    config.setMaxOutliers(0d);
    config.setMetric(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }
  }