@SuppressWarnings("unchecked") @Override public List<String> getClusterByCarrotVersion2(String query) { // TODO Auto-generated method stub List<String> strs = new ArrayList<String>(); final Controller controller = ControllerFactory.createPooling(); final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>(); LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes).directory(directory); SimpleFieldMapperDescriptor.attributeBuilder(luceneGlobalAttributes) .titleField(TITLE_FIELD) .contentField(CONTENTS_FIELD) .searchFields(Arrays.asList(new String[] {TITLE_FIELD, CONTENTS_FIELD})); controller.init( new HashMap<String, Object>(), new ProcessingComponentConfiguration( LuceneDocumentSource.class, "lucene", luceneGlobalAttributes)); final Map<String, Object> processingAttributes = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query); ProcessingResult process = controller.process( processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName()); for (Cluster c : process.getClusters()) { strs.add(c.getLabel() + " >>>> " + c.getAllDocuments().size()); } return strs; }
public static void main(String[] args) throws IOException { /* * We will use the CachingController for this example. Running * LuceneDocumentSource within the CachingController will let us open the index * once per component initialization and not once per query, which would be the * case with SimpleController. We will also use this opportunity to show how * component-specific attribute values can be passed during CachingComponent * initialization. */ /* * Create a caching controller that will reuse processing component instances, but * will not perform any caching of results produced by components. We will leave * caching of documents from Lucene index to Lucene and the operating system * caches. */ final Controller controller = ControllerFactory.createPooling(); /* * Prepare a map with component-specific attributes. Here, this map will contain * the index location and names of fields to be used to fetch document title and * summary. */ final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>(); String indexPath = "put your index path here or pass as the first argument"; if (args.length == 1) { indexPath = args[0]; } LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes) .directory(FSDirectory.open(new File(indexPath))); /* * Specify fields providing data inside your Lucene index. */ SimpleFieldMapperDescriptor.attributeBuilder(luceneGlobalAttributes) .titleField("title") .contentField("snippet") .searchFields(Arrays.asList(new String[] {"titleField", "fullContent"})); /* * Initialize the controller passing the above attributes as component-specific * for Lucene. The global attributes map will be empty. Note that we've provided * an identifier for our specially-configured Lucene component, we'll need to use * this identifier when performing processing. */ controller.init( new HashMap<String, Object>(), new ProcessingComponentConfiguration( LuceneDocumentSource.class, "lucene", luceneGlobalAttributes)); /* * Perform processing. */ String query = "mining"; final Map<String, Object> processingAttributes = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query); /* * We need to refer to the Lucene component by its identifier we set during * initialization. As we've not assigned any identifier to the * LingoClusteringAlgorithm we want to use, we can its fully qualified class name. */ ProcessingResult process = controller.process( processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName()); ConsoleFormatter.displayResults(process); }