@SuppressWarnings("unchecked")
 @Override
 public List<String> getClusterByCarrotVersion2(String query) {
   // TODO Auto-generated method stub
   List<String> strs = new ArrayList<String>();
   final Controller controller = ControllerFactory.createPooling();
   final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>();
   LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes).directory(directory);
   SimpleFieldMapperDescriptor.attributeBuilder(luceneGlobalAttributes)
       .titleField(TITLE_FIELD)
       .contentField(CONTENTS_FIELD)
       .searchFields(Arrays.asList(new String[] {TITLE_FIELD, CONTENTS_FIELD}));
   controller.init(
       new HashMap<String, Object>(),
       new ProcessingComponentConfiguration(
           LuceneDocumentSource.class, "lucene", luceneGlobalAttributes));
   final Map<String, Object> processingAttributes = Maps.newHashMap();
   CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query);
   ProcessingResult process =
       controller.process(
           processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName());
   for (Cluster c : process.getClusters()) {
     strs.add(c.getLabel() + " >>>> " + c.getAllDocuments().size());
   }
   return strs;
 }
Example #2
0
  int calculateH(Cluster cluster) {
    final Map<Object, Integer> documentCountByPartition =
        getDocumentCountByPartition(cluster.getAllDocuments());

    final ArrayList<Integer> counts = Lists.newArrayList();
    counts.addAll(documentCountByPartition.values());

    return calculateH(counts);
  }
Example #3
0
  @SuppressWarnings("unchecked")
  double calculate(Cluster cluster, int partitionCount) {
    int clusterPartitionAssignments = 0;
    for (Document document : cluster.getAllDocuments()) {
      clusterPartitionAssignments +=
          ((Collection<Object>) document.getField(Document.PARTITIONS)).size();
    }

    final double worstCaseH = calculateWorstCaseH(clusterPartitionAssignments, partitionCount);
    if (worstCaseH == 0) {
      return 0;
    } else {
      return calculateH(cluster) / worstCaseH;
    }
  }