Пример #1
0
 @Override
 @Deprecated
 public double getInformationLoss(final Node node) {
   check(node);
   metric.evaluate(node, currentGroupify);
   return node.getInformationLoss().getValue();
 }
Пример #2
0
  @Override
  public void check(final Node node) {

    // Store snapshot from last check
    if (stateMachine.getLastNode() != null) {
      history.store(
          stateMachine.getLastNode(), currentGroupify, stateMachine.getLastTransition().snapshot);
    }

    // Transition
    final Transition transition = stateMachine.transition(node);

    // Switch groupifies
    final IHashGroupify temp = lastGroupify;
    lastGroupify = currentGroupify;
    currentGroupify = temp;
    currentGroupify.clear();

    // Apply transition
    switch (transition.type) {
      case UNOPTIMIZED:
        currentGroupify =
            transformer.apply(transition.projection, node.getTransformation(), currentGroupify);
        break;
      case ROLLUP:
        currentGroupify =
            transformer.applyRollup(
                transition.projection, node.getTransformation(), lastGroupify, currentGroupify);
        break;
      case SNAPSHOT:
        currentGroupify =
            transformer.applySnapshot(
                transition.projection,
                node.getTransformation(),
                currentGroupify,
                transition.snapshot);
        break;
    }

    // Mark as checked
    node.setChecked();

    // Propagate k-anonymity
    node.setKAnonymous(currentGroupify.isKAnonymous());

    // Propagate anonymity and information loss
    if (currentGroupify.isAnonymous()) {
      node.setAnonymous(true);
      metric.evaluate(node, currentGroupify);
    } else {
      node.setInformationLoss(null);
      node.setAnonymous(false);
    }
  }
Пример #3
0
  @Override
  @Deprecated
  public Data transform(final Node node) {

    // Apply transition and groupify
    currentGroupify.clear();
    currentGroupify = transformer.apply(0L, node.getTransformation(), currentGroupify);

    // Determine outliers and set infoloss
    if (!node.isChecked()) {
      node.setChecked();
      node.setAnonymous(currentGroupify.isAnonymous());
      metric.evaluate(node, currentGroupify);
      node.setTagged();
    }

    return getBuffer();
  }
Пример #4
0
  @Override
  public Data transformAndMarkOutliers(final Node node) {

    // Apply transition and groupify
    currentGroupify.clear();
    currentGroupify = transformer.apply(0L, node.getTransformation(), currentGroupify);

    // Determine outliers and set infoloss
    node.setAnonymous(currentGroupify.isAnonymous());
    if (!node.isChecked()) {
      node.setChecked();
      metric.evaluate(node, currentGroupify);
      node.setTagged();
    }

    // Find outliers
    if (config.getAbsoluteMaxOutliers() != 0) {
      currentGroupify.markOutliers(transformer.getBuffer());
    }

    // Return the buffer
    return getBuffer();
  }
Пример #5
0
  /**
   * Entry point.
   *
   * @param args the arguments
   */
  public static void main(String[] args) throws IOException {

    // Define data
    Data data = getData();

    // Define attribute types
    data.getDefinition().setAttributeType("age", getHierarchyAge());
    data.getDefinition().setAttributeType("zipcode", getHierarchyZipcode());
    data.getDefinition().setAttributeType("disease1", AttributeType.SENSITIVE_ATTRIBUTE);
    data.getDefinition().setAttributeType("disease2", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addPrivacyModel(new KAnonymity(3));
    config.addPrivacyModel(
        new HierarchicalDistanceTCloseness("disease1", 0.6d, getHierarchyDisease()));
    config.addPrivacyModel(new RecursiveCLDiversity("disease2", 3d, 2));
    config.setMaxOutliers(0d);
    config.setQualityModel(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    if (result.getGlobalOptimum() != null) {
      System.out.println(" - Transformed data:");
      Iterator<String[]> transformed = result.getOutput(false).iterator();
      while (transformed.hasNext()) {
        System.out.print("   ");
        System.out.println(Arrays.toString(transformed.next()));
      }
    }
  }
 /**
  * Returns the test cases.
  *
  * @return
  * @throws IOException
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() throws IOException {
   return Arrays.asList(
       new Object[][] {
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           0.01d,
                           ARXPopulationModel.create(Region.USA),
                           CellSizeEstimator.ZERO_TRUNCATED_POISSON)),
               "occupation",
               "./data/adult.csv",
               130804.5332092598,
               new int[] {0, 0, 1, 1, 0, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           1000,
                           0.01d,
                           ARXPopulationModel.create(Region.USA),
                           CellSizeEstimator.ZERO_TRUNCATED_POISSON)),
               "occupation",
               "./data/adult.csv",
               151894.1394841501,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               45.014925373134325,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               23387.494246375998,
               new int[] {0, 0, 1, 2, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               28551.7222913157,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           20,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               11.424242424242424,
               new int[] {1, 0, 1, 1, 3, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           7,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               17075.7181747451,
               new int[] {0, 0, 1, 1, 2, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               15121.633326877098,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               45.014925373134325,
               new int[] {1, 0, 1, 2, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           2,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               23108.1673304724,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           10,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               30238.2081484441,
               new int[] {0, 1, 1, 2, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createAECSMetric())
                   .addPrivacyModel(
                       new KMap(
                           10,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               7.215311004784689,
               new int[] {0, 0, 1, 1, 3, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new KMap(
                           5,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               17053.8743069776,
               new int[] {0, 0, 1, 0, 2, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new KMap(
                           3,
                           DataSubset.create(
                               Data.create("./data/adult.csv", StandardCharsets.UTF_8, ';'),
                               Data.create(
                                   "./data/adult_subset.csv", StandardCharsets.UTF_8, ';')))),
               "occupation",
               "./data/adult.csv",
               15121.633326877098,
               new int[] {0, 0, 1, 1, 1, 2, 1, 0},
               false)
         },
       });
 }
Пример #7
0
  /**
   * Update the view.
   *
   * @param part
   */
  protected void doUpdate(ModelPart part) {

    // Check model
    if (model == null) {
      return;
    }

    // Obtain definition
    DataDefinition definition = model.getOutputDefinition();
    if (definition == null) definition = model.getInputDefinition();

    // Obtain relevant configuration objects;
    ModelConfiguration config = null;
    Metric<?> metric = null;
    if (model.getOutputConfig() != null) {
      config = model.getOutputConfig();
      metric = config.getMetric();

      // We don't need to update in many cases, if we are displaying an output configuration
      if (part == ModelPart.ATTRIBUTE_TYPE
          || part == ModelPart.METRIC
          || part == ModelPart.ATTRIBUTE_WEIGHT
          || part == ModelPart.GS_FACTOR
          || part == ModelPart.MAX_OUTLIERS
          || part == ModelPart.DATA_TYPE
          || part == ModelPart.FINANCIAL_MODEL) {
        return;
      }

    } else {
      config = model.getInputConfig();
      // TODO: This is such an ugly hack
      metric = model.getMetricDescription().createInstance(model.getMetricConfiguration());
    }

    // Check
    if (definition == null || config == null || model.getInputConfig().getInput() == null) {
      reset();
      return;
    }

    // Obtain handle
    DataHandle data = model.getInputConfig().getInput().getHandle();

    // Disable redrawing
    root.setRedraw(false);

    // Clear
    roots.clear();

    // Print basic properties
    new Property(
        Resources.getMessage("PropertiesView.9"),
        new String[] {String.valueOf(data.getNumRows())}); // $NON-NLS-1$
    new Property(
        Resources.getMessage("PropertiesView.10"),
        new String[] {
          SWTUtil.getPrettyString(config.getAllowedOutliers() * 100d)
              + Resources.getMessage("PropertiesView.11")
        }); //$NON-NLS-1$ //$NON-NLS-2$

    // Utility measure
    Property m =
        new Property(
            Resources.getMessage("PropertiesView.114"),
            new String[] {metric.getDescription().getName()}); // $NON-NLS-1$

    // Properties of the utility measure
    if (metric.getAggregateFunction() != null) {
      new Property(
          m,
          Resources.getMessage("PropertiesView.149"),
          new String[] {metric.getAggregateFunction().toString()}); // $NON-NLS-1$
    }
    if (metric.isGSFactorSupported()) {
      new Property(
          m,
          Resources.getMessage("PropertiesView.151"),
          new String[] {
            SWTUtil.getPrettyString(metric.getGeneralizationSuppressionFactor())
          }); //$NON-NLS-1$
      new Property(
          m,
          Resources.getMessage("PropertiesView.152"),
          new String[] {SWTUtil.getPrettyString(metric.getGeneralizationFactor())}); // $NON-NLS-1$
      new Property(
          m,
          Resources.getMessage("PropertiesView.153"),
          new String[] {SWTUtil.getPrettyString(metric.getSuppressionFactor())}); // $NON-NLS-1$
    }
    new Property(
        m,
        Resources.getMessage("PropertiesView.155"),
        new String[] {SWTUtil.getPrettyString(metric.isMonotonic())}); // $NON-NLS-1$
    new Property(
        m,
        Resources.getMessage("PropertiesView.156"),
        new String[] {SWTUtil.getPrettyString(metric.isWeighted())}); // $NON-NLS-1$
    new Property(
        m,
        Resources.getMessage("PropertiesView.157"),
        new String[] {SWTUtil.getPrettyString(metric.isPrecomputed())}); // $NON-NLS-1$
    new Property(
        m,
        Resources.getMessage("PropertiesView.158"),
        new String[] {
          SWTUtil.getPrettyString(metric.isAbleToHandleMicroaggregation())
        }); //$NON-NLS-1$

    // Financial configuration
    if (metric instanceof MetricSDNMPublisherPayout) {

      // Obtain for output data
      ARXFinancialConfiguration financial =
          ((MetricSDNMPublisherPayout) metric).getFinancialConfiguration();

      // Obtain for input only. This is a bit ugly.
      if (financial == null) {
        financial = ARXFinancialConfiguration.create();
        financial
            .setAdversaryCost(config.getAdversaryCost())
            .setAdversaryGain(config.getAdversaryGain())
            .setPublisherBenefit(config.getPublisherBenefit())
            .setPublisherLoss(config.getPublisherLoss());
      }

      // Render
      new Property(
          m,
          Resources.getMessage("PropertiesView.135"),
          new String[] {SWTUtil.getPrettyString(financial.getPublisherBenefit())}); // $NON-NLS-1$
      new Property(
          m,
          Resources.getMessage("PropertiesView.136"),
          new String[] {SWTUtil.getPrettyString(financial.getPublisherLoss())}); // $NON-NLS-1$
      new Property(
          m,
          Resources.getMessage("PropertiesView.137"),
          new String[] {SWTUtil.getPrettyString(financial.getAdversaryGain())}); // $NON-NLS-1$
      new Property(
          m,
          Resources.getMessage("PropertiesView.138"),
          new String[] {SWTUtil.getPrettyString(financial.getAdversaryCost())}); // $NON-NLS-1$
      if (((MetricSDNMPublisherPayout) metric).isProsecutorAttackerModel()) {
        new Property(
            m,
            Resources.getMessage("PropertiesView.139"),
            new String[] {Resources.getMessage("PropertiesView.160")}); // $NON-NLS-1$ //$NON-NLS-2$
      }
      if (((MetricSDNMPublisherPayout) metric).isJournalistAttackerModel()) {
        new Property(
            m,
            Resources.getMessage("PropertiesView.139"),
            new String[] {Resources.getMessage("PropertiesView.161")}); // $NON-NLS-1$ //$NON-NLS-2$
      }
    }

    // Attributes
    final Property attributes =
        new Property(
            Resources.getMessage("PropertiesView.12"),
            new String[] {String.valueOf(data.getNumColumns())}); // $NON-NLS-1$

    // Print identifying attributes
    final Property identifying =
        new Property(
            attributes,
            Resources.getMessage("PropertiesView.13"),
            new String[] {
              String.valueOf(definition.getIdentifyingAttributes().size())
            }); //$NON-NLS-1$
    int index = 0;
    for (int i = 0; i < data.getNumColumns(); i++) {
      final String s = data.getAttributeName(i);
      if (definition.getIdentifyingAttributes().contains(s)) {
        final String[] values =
            new String[] {
              "", "", "", "", ""
            }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
        values[0] = s;
        values[1] = definition.getDataType(s).toString();
        new Property(
            identifying,
            Resources.getMessage("PropertiesView.19") + (index++),
            values); //$NON-NLS-1$
      }
    }

    // Print quasi-identifying attributes
    final Property quasiIdentifying =
        new Property(
            attributes,
            Resources.getMessage("PropertiesView.20"),
            new String[] {
              String.valueOf(definition.getQuasiIdentifyingAttributes().size())
            }); //$NON-NLS-1$
    index = 0;
    for (int i = 0; i < data.getNumColumns(); i++) {
      final String s = data.getAttributeName(i);
      if (definition.getQuasiIdentifyingAttributes().contains(s)) {
        final String[] values =
            new String[] {
              "", "", "", "", "", "", "", ""
            }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
        // //$NON-NLS-7$ //$NON-NLS-8$
        values[0] = s;
        if (definition.getHierarchy(s) != null) {
          DataType<?> type = definition.getDataType(s);
          values[1] = type.getDescription().getLabel();
          if (type.getDescription().hasFormat()
              && ((DataTypeWithFormat) type).getFormat() != null) {
            values[2] = ((DataTypeWithFormat) type).getFormat();
          }

          // Determine height of hierarchy
          int height = 0;
          String[][] hierarchy = definition.getHierarchy(s);
          if (hierarchy != null && hierarchy.length != 0 && hierarchy[0] != null) {
            height = hierarchy[0].length;
          }
          values[3] = String.valueOf(height);
          values[4] = String.valueOf(definition.getMinimumGeneralization(s));
          values[5] = String.valueOf(definition.getMaximumGeneralization(s));
        }
        if (definition.getMicroAggregationFunction(s) != null) {
          values[7] = definition.getMicroAggregationFunction(s).getLabel();
        }
        values[6] = SWTUtil.getPrettyString(config.getAttributeWeight(s));
        new Property(
            quasiIdentifying,
            Resources.getMessage("PropertiesView.26") + (index++),
            values); //$NON-NLS-1$
      }
    }

    // Print sensitive attributes
    final Property sensitive =
        new Property(
            attributes,
            Resources.getMessage("PropertiesView.27"),
            new String[] {
              String.valueOf(definition.getSensitiveAttributes().size())
            }); //$NON-NLS-1$
    index = 0;
    for (int i = 0; i < data.getNumColumns(); i++) {
      final String s = data.getAttributeName(i);
      if (definition.getSensitiveAttributes().contains(s)) {
        final String[] values =
            new String[] {
              "", "", "", "", ""
            }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
        values[0] = s;
        if (config.getHierarchy(s) != null && config.getHierarchy(s).getHierarchy() != null) {
          int height = 0;
          if (config.getHierarchy(s).getHierarchy().length > 0) {
            height = config.getHierarchy(s).getHierarchy()[0].length;
          }
          values[1] = definition.getDataType(s).toString();
          values[2] = String.valueOf(height);
        }
        new Property(
            sensitive,
            Resources.getMessage("PropertiesView.33") + (index++),
            values); //$NON-NLS-1$
      }
    }

    // Print insensitive attributes
    final Property insensitive =
        new Property(
            attributes,
            Resources.getMessage("PropertiesView.34"),
            new String[] {
              String.valueOf(definition.getInsensitiveAttributes().size())
            }); //$NON-NLS-1$

    index = 0;
    for (int i = 0; i < data.getNumColumns(); i++) {
      final String s = data.getAttributeName(i);
      if (definition.getInsensitiveAttributes().contains(s)) {
        final String[] values =
            new String[] {
              "", "", "", "", ""
            }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
        values[0] = s;
        values[1] = definition.getDataType(s).toString();
        new Property(
            insensitive,
            Resources.getMessage("PropertiesView.40") + (index++),
            values); //$NON-NLS-1$
      }
    }

    // Refresh and initialize
    refresh();

    // Redraw
    root.setRedraw(true);
  }
 /**
  * Returns the test cases.
  *
  * @return
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() {
   return Arrays.asList(
       new Object[][] {
         /* 0 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               255559.85455731067,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               379417.3460570988,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               407289.5388925293,
               new int[] {1, 2, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               453196.8932458743,
               new int[] {0, 4, 1, 1, 3, 2, 2, 1, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/adult.csv",
               255559.85455731067,
               new int[] {1, 0, 1, 1, 3, 2, 2, 0, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/adult.csv",
               379417.3460570988,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               1764006.4033760305,
               new int[] {2, 4, 0, 1, 0, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               1994002.8308631124,
               new int[] {3, 4, 1, 1, 0, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               2445878.424834677,
               new int[] {4, 4, 1, 1, 1, 4, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               2517471.5816586106,
               new int[] {5, 4, 1, 0, 1, 4, 4, 4},
               false)
         },
         /* 10 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/cup.csv",
               1764006.4033760305,
               new int[] {2, 4, 0, 1, 0, 4, 4, 4},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/cup.csv",
               2001343.4737485605,
               new int[] {3, 4, 1, 1, 0, 1, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               4469271.0,
               new int[] {0, 2, 2, 2, 1, 2, 1, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               5.6052481E7,
               new int[] {0, 2, 3, 3, 1, 2, 2, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               1.42377891E8,
               new int[] {1, 2, 3, 3, 1, 2, 1, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               4.36925397E8,
               new int[] {5, 2, 3, 3, 1, 2, 0, 2},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(5)),
               "./data/fars.csv",
               4469271.0,
               new int[] {0, 2, 2, 2, 1, 2, 1, 0},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new KAnonymity(100)),
               "./data/fars.csv",
               5.6052481E7,
               new int[] {0, 2, 3, 3, 1, 2, 2, 2},
               true)
         },
       });
 }
 /**
  * Returns the test cases.
  *
  * @return
  */
 @Parameters(name = "{index}:[{0}]")
 public static Collection<Object[]> cases() {
   return Arrays.asList(
       new Object[][] {
           /* 0 */
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               216092.124036387,
               new int[] {1, 0, 1, 0, 3, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 100, EntropyEstimator.SHANNON)),
               "occupation",
               "./data/adult.csv",
               0.0d,
               null,
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               324620.5269918692,
               new int[] {1, 1, 1, 1, 3, 2, 2, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.05d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 3, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               180347.4325366015,
               new int[] {0, 0, 1, 1, 2, 2, 2, 0},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 5, EntropyEstimator.SHANNON)),
               "occupation",
               "./data/adult.csv",
               228878.2039109517,
               new int[] {1, 0, 1, 1, 2, 2, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.1d, Metric.createPrecomputedEntropyMetric(0.1d, false))
                   .addPrivacyModel(
                       new EntropyLDiversity("occupation", 100, EntropyEstimator.GRASSBERGER)),
               "occupation",
               "./data/adult.csv",
               0.0d,
               null,
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               1833435.0,
               new int[] {4, 0, 1, 0, 1, 3, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.03d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               4.5168281E7,
               new int[] {4, 4, 0, 0, 1, 3, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 5)),
               "RAMNTALL",
               "./data/cup.csv",
               3.01506905E8,
               new int[] {4, 4, 1, 1, 1, 4, 4},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(new EntropyLDiversity("RAMNTALL", 3)),
               "RAMNTALL",
               "./data/cup.csv",
               9.2264547E7,
               new int[] {4, 4, 1, 0, 1, 4, 4},
               false)
         },
         /* 10 */ {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 5, EntropyEstimator.SHANNON)),
               "RAMNTALL",
               "./data/cup.csv",
               2823649.0,
               new int[] {4, 0, 0, 1, 1, 3, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.1d, Metric.createDiscernabilityMetric(true))
                   .addPrivacyModel(
                       new EntropyLDiversity("RAMNTALL", 100, EntropyEstimator.GRASSBERGER)),
               "RAMNTALL",
               "./data/cup.csv",
               3.4459973E7,
               new int[] {5, 0, 0, 2, 1, 2, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               7735322.29514608,
               new int[] {0, 0, 0, 1, 3, 0, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 2, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               5428093.534997522,
               new int[] {0, 0, 0, 0, 2, 0, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new EntropyLDiversity("EDUC", 5, EntropyEstimator.SHANNON)),
               "EDUC",
               "./data/ihis.csv",
               1.2258628558792587E7,
               new int[] {0, 0, 0, 3, 3, 2, 0, 1},
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.0d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 100, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               0.0d,
               null,
               false)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.04d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(
                       new EntropyLDiversity("EDUC", 5, EntropyEstimator.GRASSBERGER)),
               "EDUC",
               "./data/ihis.csv",
               7735322.29514608,
               new int[] {0, 0, 0, 1, 3, 0, 0, 1},
               true)
         },
         {
           new ARXAnonymizationTestCase(
               ARXConfiguration.create(0.02d, Metric.createPrecomputedEntropyMetric(0.1d, true))
                   .addPrivacyModel(new EntropyLDiversity("EDUC", 3, EntropyEstimator.SHANNON)),
               "EDUC",
               "./data/ihis.csv",
               7578152.206004559,
               new int[] {0, 0, 0, 2, 2, 0, 0, 1},
               true)
         },
       });
 }
Пример #10
0
  /**
   * Entry point.
   *
   * @param args the arguments
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {

    // Define data
    DefaultData data = Data.create();
    data.add("zipcode", "age", "disease");
    data.add("47677", "29", "gastric ulcer");
    data.add("47602", "22", "gastritis");
    data.add("47678", "27", "stomach cancer");
    data.add("47905", "43", "gastritis");
    data.add("47909", "52", "flu");
    data.add("47906", "47", "bronchitis");
    data.add("47605", "30", "bronchitis");
    data.add("47673", "36", "pneumonia");
    data.add("47607", "32", "stomach cancer");

    // Define hierarchies
    DefaultHierarchy age = Hierarchy.create();
    age.add("29", "<=40", "*");
    age.add("22", "<=40", "*");
    age.add("27", "<=40", "*");
    age.add("43", ">40", "*");
    age.add("52", ">40", "*");
    age.add("47", ">40", "*");
    age.add("30", "<=40", "*");
    age.add("36", "<=40", "*");
    age.add("32", "<=40", "*");

    // Only excerpts for readability
    DefaultHierarchy zipcode = Hierarchy.create();
    zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****");
    zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****");
    zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****");
    zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****");

    // Define sensitive value hierarchy
    DefaultHierarchy disease = Hierarchy.create();
    disease.add(
        "flu",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pneumonia",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "bronchitis",
        "respiratory infection",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary edema",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "pulmonary embolism",
        "vascular lung disease",
        "vascular lung disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastric ulcer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "stomach cancer",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "gastritis",
        "stomach disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colitis",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");
    disease.add(
        "colon cancer",
        "colon disease",
        "digestive system disease",
        "respiratory & digestive system disease");

    data.getDefinition().setAttributeType("age", age);
    data.getDefinition().setAttributeType("zipcode", zipcode);
    data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE);

    // Create an instance of the anonymizer
    ARXAnonymizer anonymizer = new ARXAnonymizer();
    ARXConfiguration config = ARXConfiguration.create();
    config.addCriterion(new KAnonymity(3));
    config.addCriterion(new HierarchicalDistanceTCloseness("disease", 0.6d, disease));
    config.setMaxOutliers(0d);
    config.setMetric(Metric.createEntropyMetric());

    // Now anonymize
    ARXResult result = anonymizer.anonymize(data, config);

    // Print info
    printResult(result, data);

    // Process results
    System.out.println(" - Transformed data:");
    Iterator<String[]> transformed = result.getOutput(false).iterator();
    while (transformed.hasNext()) {
      System.out.print("   ");
      System.out.println(Arrays.toString(transformed.next()));
    }
  }