Ejemplo n.º 1
0
  private Instance createInstance(String id, Double value) {
    Instance inst = new Instance(id);

    NumericVectorFeature feature = new NumericVectorFeature("num");
    feature.setValue(new double[] {value});
    inst.addFeature(feature);

    return inst;
  }
  /** @param args */
  public static void main(String[] args) {
    DataSet ds = new DataSet();

    Instance inst = new Instance("1");
    StringFeature feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jack black");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("san diego");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("2");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jack black");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("3");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jack");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("san diego");
    inst.addFeature(feature);

    ds.add(inst);

    inst = new Instance("4");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jack l. black");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("san diego");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("5");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("j. black");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("san diego");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("6");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("j black");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("new york");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("7");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("black");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("new york");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("8");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jackie black");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("9");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jack brown");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("10");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jackie green");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("11");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("bob");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("san fran");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("12");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("bobbie");
    inst.addFeature(feature);
    feature = new StringFeature(FEATURE_NAME2);
    feature.setValue("san fran");
    inst.addFeature(feature);
    ds.add(inst);

    inst = new Instance("13");
    feature = new StringFeature(FEATURE_NAME1);
    feature.setValue("jackie");
    inst.addFeature(feature);
    ds.add(inst);

    DPMeans clusterer = new DPMeans(5, true);
    clusterer.registerFeatureType(FEATURE_NAME1, StringMedianCentroid.class, new EditDistance(0.5));
    clusterer.registerFeatureType(
        FEATURE_NAME2, StringMedianCentroid.class, new ExactTokenMatchDistance(0.5));
    clusterer.setThreshold(0.5);

    ClusterResult clusters = clusterer.doCluster(ds);
    for (Cluster c : clusters) {
      System.out.println(c.toString(true));
    }
    clusterer.terminate();
  }