private Instance createInstance(String id, Double value) { Instance inst = new Instance(id); NumericVectorFeature feature = new NumericVectorFeature("num"); feature.setValue(new double[] {value}); inst.addFeature(feature); return inst; }
/** @param args */ public static void main(String[] args) { DataSet ds = new DataSet(); Instance inst = new Instance("1"); StringFeature feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack black"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("san diego"); inst.addFeature(feature); ds.add(inst); inst = new Instance("2"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("3"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("san diego"); inst.addFeature(feature); ds.add(inst); inst = new Instance("4"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack l. black"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("san diego"); inst.addFeature(feature); ds.add(inst); inst = new Instance("5"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("j. black"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("san diego"); inst.addFeature(feature); ds.add(inst); inst = new Instance("6"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("j black"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("new york"); inst.addFeature(feature); ds.add(inst); inst = new Instance("7"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("black"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("new york"); inst.addFeature(feature); ds.add(inst); inst = new Instance("8"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jackie black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("9"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack brown"); inst.addFeature(feature); ds.add(inst); inst = new Instance("10"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jackie green"); inst.addFeature(feature); ds.add(inst); inst = new Instance("11"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("bob"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("san fran"); inst.addFeature(feature); ds.add(inst); inst = new Instance("12"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("bobbie"); inst.addFeature(feature); feature = new StringFeature(FEATURE_NAME2); feature.setValue("san fran"); inst.addFeature(feature); ds.add(inst); inst = new Instance("13"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jackie"); inst.addFeature(feature); ds.add(inst); DPMeans clusterer = new DPMeans(5, true); clusterer.registerFeatureType(FEATURE_NAME1, StringMedianCentroid.class, new EditDistance(0.5)); clusterer.registerFeatureType( FEATURE_NAME2, StringMedianCentroid.class, new ExactTokenMatchDistance(0.5)); clusterer.setThreshold(0.5); ClusterResult clusters = clusterer.doCluster(ds); for (Cluster c : clusters) { System.out.println(c.toString(true)); } clusterer.terminate(); }