@SuppressWarnings("unchecked") @Override public List<String> getClusterByCarrotVersion2(String query) { // TODO Auto-generated method stub List<String> strs = new ArrayList<String>(); final Controller controller = ControllerFactory.createPooling(); final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>(); LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes).directory(directory); SimpleFieldMapperDescriptor.attributeBuilder(luceneGlobalAttributes) .titleField(TITLE_FIELD) .contentField(CONTENTS_FIELD) .searchFields(Arrays.asList(new String[] {TITLE_FIELD, CONTENTS_FIELD})); controller.init( new HashMap<String, Object>(), new ProcessingComponentConfiguration( LuceneDocumentSource.class, "lucene", luceneGlobalAttributes)); final Map<String, Object> processingAttributes = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query); ProcessingResult process = controller.process( processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName()); for (Cluster c : process.getClusters()) { strs.add(c.getLabel() + " >>>> " + c.getAllDocuments().size()); } return strs; }
int calculateH(Cluster cluster) { final Map<Object, Integer> documentCountByPartition = getDocumentCountByPartition(cluster.getAllDocuments()); final ArrayList<Integer> counts = Lists.newArrayList(); counts.addAll(documentCountByPartition.values()); return calculateH(counts); }
@SuppressWarnings("unchecked") double calculate(Cluster cluster, int partitionCount) { int clusterPartitionAssignments = 0; for (Document document : cluster.getAllDocuments()) { clusterPartitionAssignments += ((Collection<Object>) document.getField(Document.PARTITIONS)).size(); } final double worstCaseH = calculateWorstCaseH(clusterPartitionAssignments, partitionCount); if (worstCaseH == 0) { return 0; } else { return calculateH(cluster) / worstCaseH; } }