@SuppressWarnings("unchecked") @Override public List<String> getClusterByCarrot2(String query) { // TODO Auto-generated method stub List<String> strs = new ArrayList<String>(); final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class); final List<org.carrot2.core.Document> documents = Lists.newArrayList(); try { q = getParser().parse(QueryParserUtil.escape(query)); docs = getIndexSearcher().search(q, Integer.MAX_VALUE); hits = docs.scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = getIndexSearcher().doc(hits[i].doc); documents.add( new org.carrot2.core.Document( doc.get(CONTENTS_FIELD), doc.get(TITLE_FIELD), doc.get(USER_FIELD))); } final ProcessingResult byTopicClusters = controller.process(documents, query, LingoClusteringAlgorithm.class); final List<Cluster> clustersByTopic = byTopicClusters.getClusters(); final ProcessingResult byDomainClusters = controller.process(documents, query, ByUrlClusteringAlgorithm.class); final List<Cluster> clustersByDomain = byDomainClusters.getClusters(); for (Cluster c : clustersByDomain) { strs.add(c.getLabel()); } for (Cluster c : clustersByTopic) { strs.add(c.getLabel()); } } catch (Exception ex) { } return strs; }
@SuppressWarnings("unchecked") @Override public List<String> getClusterByCarrotVersion2(String query) { // TODO Auto-generated method stub List<String> strs = new ArrayList<String>(); final Controller controller = ControllerFactory.createPooling(); final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>(); LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes).directory(directory); SimpleFieldMapperDescriptor.attributeBuilder(luceneGlobalAttributes) .titleField(TITLE_FIELD) .contentField(CONTENTS_FIELD) .searchFields(Arrays.asList(new String[] {TITLE_FIELD, CONTENTS_FIELD})); controller.init( new HashMap<String, Object>(), new ProcessingComponentConfiguration( LuceneDocumentSource.class, "lucene", luceneGlobalAttributes)); final Map<String, Object> processingAttributes = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query); ProcessingResult process = controller.process( processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName()); for (Cluster c : process.getClusters()) { strs.add(c.getLabel() + " >>>> " + c.getAllDocuments().size()); } return strs; }
@UsesExternalServices @Test public void testRequestIndependence() { @SuppressWarnings("unchecked") final Controller controller = ControllerFactory.createCachingPooling(org.carrot2.core.IDocumentSource.class); closeAfterTest(controller); final Map<String, Object> attrs = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(attrs).results(50).query("data mining"); controller.process( attrs, org.carrot2.webapp.source.WebDocumentSource.class, LingoClusteringAlgorithm.class); attrs.clear(); CommonAttributesDescriptor.attributeBuilder(attrs) .results(50) .query(WebDocumentSource.QUERY_FAILURE); try { controller.process( attrs, org.carrot2.webapp.source.WebDocumentSource.class, LingoClusteringAlgorithm.class); fail(); } catch (ProcessingException e) { assertThat(e.getCause().getMessage()).contains("Synthetic failure"); } }
public static void main(String[] args) throws IOException { /* * We will use the CachingController for this example. Running * LuceneDocumentSource within the CachingController will let us open the index * once per component initialization and not once per query, which would be the * case with SimpleController. We will also use this opportunity to show how * component-specific attribute values can be passed during CachingComponent * initialization. */ /* * Create a caching controller that will reuse processing component instances, but * will not perform any caching of results produced by components. We will leave * caching of documents from Lucene index to Lucene and the operating system * caches. */ final Controller controller = ControllerFactory.createPooling(); /* * Prepare a map with component-specific attributes. Here, this map will contain * the index location and names of fields to be used to fetch document title and * summary. */ final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>(); String indexPath = "put your index path here or pass as the first argument"; if (args.length == 1) { indexPath = args[0]; } LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes) .directory(FSDirectory.open(new File(indexPath))); /* * Specify fields providing data inside your Lucene index. */ SimpleFieldMapperDescriptor.attributeBuilder(luceneGlobalAttributes) .titleField("title") .contentField("snippet") .searchFields(Arrays.asList(new String[] {"titleField", "fullContent"})); /* * Initialize the controller passing the above attributes as component-specific * for Lucene. The global attributes map will be empty. Note that we've provided * an identifier for our specially-configured Lucene component, we'll need to use * this identifier when performing processing. */ controller.init( new HashMap<String, Object>(), new ProcessingComponentConfiguration( LuceneDocumentSource.class, "lucene", luceneGlobalAttributes)); /* * Perform processing. */ String query = "mining"; final Map<String, Object> processingAttributes = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query); /* * We need to refer to the Lucene component by its identifier we set during * initialization. As we've not assigned any identifier to the * LingoClusteringAlgorithm we want to use, we can its fully qualified class name. */ ProcessingResult process = controller.process( processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName()); ConsoleFormatter.displayResults(process); }
/** Entry point. */ public static void main(String[] args) throws IOException { /* * We will use the CachingController for this example. Running * LuceneDocumentSource within the CachingController will let us open the index * once per component initialization and not once per query, which would be the * case with SimpleController. We will also use this opportunity to show how * component-specific attribute values can be passed during CachingComponent * initialization. */ /* * Create a caching controller that will reuse processing component instances, but * will not perform any caching of results produced by components. We will leave * caching of documents from Lucene index to Lucene and the operating system * caches. */ final Controller controller = ControllerFactory.createPooling(); /* * Prepare a map with component-specific attributes. Here, this map will contain * the index location and names of fields to be used to fetch document title and * summary. */ final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>(); String indexPath = "put your index path here or pass as the first argument"; if (args.length == 1) { indexPath = args[0]; } // Sanity check. if (!new File(indexPath).isDirectory()) { System.err.println("Index directory does not exist: " + indexPath); return; } LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes) .directory(FSDirectory.open(new File(indexPath))); /* * In ClusteringDataFromLucene we used a simple configuration of * LuceneDocumentSource whereby we only provided the names of Lucene fields to be * used for titles and summaries. If more advanced mapping of Lucene documents is * required, you can implement your own version of IFieldMapper as below. * * Note that we could also provide here an instance of the mapper rather than * its class. The differences are summarized below: * * > Class: Class has to have a no-parameter constructor. Instances of the * class will not be shared between processing threads, which means the * implementation does not have to be thread-safe. Recommended in most * situations unless the instances are expensive to create. * * > Instance: The provided instance will be shared across processing threads, * which means the implementation MUST be thread-safe. */ LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes) .fieldMapper(new CustomFieldMapper()); /* * The Analyzer used by Lucene while searching can also be provided via factory * because it does not have a parameterless constructor. */ LuceneDocumentSourceDescriptor.attributeBuilder(luceneGlobalAttributes) .analyzer(StandardAnalyzerFactory.class); /* * Initialize the controller passing the above attributes as component-specific * for Lucene. The global attributes map will be empty. Note that we've provided * an identifier for our specially-configured Lucene component, we'll need to use * this identifier when performing processing. */ controller.init( new HashMap<String, Object>(), new ProcessingComponentConfiguration( LuceneDocumentSource.class, "lucene", luceneGlobalAttributes)); /* * Perform processing. */ final String query = "mining"; final Map<String, Object> processingAttributes = Maps.newHashMap(); CommonAttributesDescriptor.attributeBuilder(processingAttributes).query(query); /* * We need to refer to the Lucene component by its identifier we set during * initialization. As we've not assigned any identifier to the * LingoClusteringAlgorithm we want to use, we can its fully qualified class name. */ ProcessingResult process = controller.process( processingAttributes, "lucene", LingoClusteringAlgorithm.class.getName()); ConsoleFormatter.displayResults(process); }
@Test public void checkBasicAuthAccess() throws Throwable { final Server server = new Server(); final SelectChannelConnector connector = new SelectChannelConnector(); connector.setPort(/* any */ 0); connector.setReuseAddress(false); connector.setSoLingerTime(0); server.addConnector(connector); HashLoginService loginService = new HashLoginService(); loginService.putUser("username", new Password("userpass"), new String[] {"role1", "role2"}); final CountDownLatch latch = new CountDownLatch(1); WebAppContext wac = new WebAppContext(); wac.getSecurityHandler().setLoginService(loginService); wac.setContextPath("/"); connector.addLifeCycleListener( new ListenerAdapter() { public void lifeCycleStarted(LifeCycle lc) { System.out.println("Started on port: " + connector.getLocalPort()); latch.countDown(); } public void lifeCycleFailure(LifeCycle lc, Throwable t) { System.out.println("Failure: " + t); latch.countDown(); } }); wac.setParentLoaderPriority(true); URL resource = getClass().getResource("/auth/basic/kaczynski.xml"); assertThat(resource.toURI().getScheme()).isEqualTo("file"); File webapp = new File(resource.toURI()); webapp = webapp.getParentFile(); // /auth/basic webapp = webapp.getParentFile(); // /auth wac.setWar(webapp.getAbsolutePath()); wac.setClassLoader(Thread.currentThread().getContextClassLoader()); server.setHandler(wac); server.setStopAtShutdown(true); try { server.start(); latch.await(); System.setProperty(HttpAuthHub.USERNAME_PROPERTY, "username"); System.setProperty(HttpAuthHub.PASSWORD_PROPERTY, "userpass"); Controller c = ControllerFactory.createSimple(); try { Map<String, Object> attrs = new HashMap<String, Object>(); XmlDocumentSourceDescriptor.attributeBuilder(attrs) .xml( new URLResourceWithParams( new URL( "http://localhost:" + connector.getLocalPort() + "/basic/kaczynski.xml"))); ProcessingResult r = c.process(attrs, XmlDocumentSource.class); assertThat(r.getDocuments()).hasSize(50); } finally { c.dispose(); } } finally { server.stop(); } }
@SuppressForbidden(reason = "C2 integration (File API)") @Override protected void doStart() throws ElasticsearchException { try { Settings.Builder builder = Settings.builder(); Path pluginConfigPath = environment.configFile().resolve(ClusteringPlugin.PLUGIN_NAME); if (!Files.isDirectory(pluginConfigPath)) { Path srcConfig = Paths.get("src/main/config"); if (Files.isDirectory(srcConfig)) { // Allow running from within the IDE. pluginConfigPath = srcConfig; } else { throw new ElasticsearchException("Config folder missing: " + pluginConfigPath); } } else { logger.info("Configuration files at: {}", pluginConfigPath.toAbsolutePath()); } for (String configName : new String[] {"config.yml", "config.yaml", "config.json", "config.properties"}) { try { Path resolved = pluginConfigPath.resolve(configName); if (resolved != null && Files.exists(resolved)) { builder.loadFromPath(resolved); } } catch (NoClassDefFoundError e) { logger.warn("Could not parse: {}", e, configName); } } Settings c2Settings = builder.build(); // Parse suite descriptors with loggers turned off (shut them up a bit). final Path suitePath = pluginConfigPath.resolve(c2Settings.get(DEFAULT_SUITE_PROPERTY_NAME)); if (!Files.isRegularFile(suitePath)) { throw new ElasticsearchException( "Could not find algorithm suite: " + suitePath.toAbsolutePath().normalize()); } final ResourceLookup suiteLookup = new ResourceLookup(new DirLocator(suitePath.getParent().toFile())); final IResource suiteResource = suiteLookup.getFirst(suitePath.getFileName().toString()); final List<String> failed = Lists.newArrayList(); final ProcessingComponentSuite suite = LoggerUtils.quietCall( new Callable<ProcessingComponentSuite>() { public ProcessingComponentSuite call() throws Exception { ProcessingComponentSuite suite = ProcessingComponentSuite.deserialize(suiteResource, suiteLookup); for (ProcessingComponentDescriptor desc : suite.removeUnavailableComponents()) { failed.add(desc.getId()); if (isNoClassDefFound(desc.getInitializationFailure())) { logger.debug("Algorithm not available on classpath: {}", desc.getId()); } else { logger.warn( "Algorithm initialization failed: {}", desc.getInitializationFailure(), desc.getId()); } } return suite; } }, Logger.getLogger(ProcessingComponentDescriptor.class), Logger.getLogger(ReflectionUtils.class)); algorithms = Lists.newArrayList(); for (ProcessingComponentDescriptor descriptor : suite.getAlgorithms()) { algorithms.add(descriptor.getId()); } algorithms = Collections.unmodifiableList(algorithms); if (!algorithms.isEmpty()) { logger.info("Available clustering components: {}", Joiner.on(", ").join(algorithms)); } if (!failed.isEmpty()) { logger.info("Unavailable clustering components: {}", Joiner.on(", ").join(failed)); } final Path resourcesPath = pluginConfigPath .resolve(c2Settings.get(DEFAULT_RESOURCES_PROPERTY_NAME, ".")) .toAbsolutePath() .normalize(); logger.info("Lexical resources dir: {}", resourcesPath); final ResourceLookup resourceLookup = new ResourceLookup( new DirLocator(resourcesPath.toFile()), new ClassLoaderLocator(ControllerSingleton.class.getClassLoader())); // Change the default resource lookup to include the configured location. Map<String, Object> c2SettingsAsMap = Maps.newHashMap(); DefaultLexicalDataFactoryDescriptor.attributeBuilder(c2SettingsAsMap) .resourceLookup(resourceLookup); c2SettingsAsMap.putAll(c2Settings.getAsMap()); // Set up the license for Lingo3G, if it's available. Path lingo3gLicense = scanForLingo3GLicense(environment, pluginConfigPath); if (lingo3gLicense != null && Files.isReadable(lingo3gLicense)) { c2SettingsAsMap.put("license", new FileResource(lingo3gLicense.toFile())); } else if (algorithms.contains("lingo3g")) { logger.warn( "Lingo3G is on classpath, but no licenses have been found. Check out the documentation."); } // Create component pool. Integer poolSize = c2Settings.getAsInt(DEFAULT_COMPONENT_SIZE_PROPERTY_NAME, 0); if (poolSize > 0) { controller = ControllerFactory.createPooling(poolSize); } else { controller = ControllerFactory.createPooling(); } controller.init(c2SettingsAsMap, suite.getComponentConfigurations()); } catch (Exception e) { throw new ElasticsearchException("Could not start Carrot2 controller.", e); } if (algorithms == null || algorithms.isEmpty()) { throw new ElasticsearchException( "No registered/ available clustering algorithms? Check the logs, it's odd."); } }