@Override public DiscoveryResult<MavenHostedRepository> discoverLocalContent( final MavenHostedRepository mavenRepository) throws IOException { final DiscoveryResult<MavenHostedRepository> discoveryResult = new DiscoveryResult<MavenHostedRepository>(mavenRepository); final WalkerContext context = new DefaultWalkerContext( mavenRepository, new ResourceStoreRequest("/"), new DefaultStoreWalkerFilter(), true); final PrefixCollectorProcessor prefixCollectorProcessor = new PrefixCollectorProcessor(); context.getProcessors().add(prefixCollectorProcessor); try { walker.walk(context); final ParentOMatic parentOMatic = prefixCollectorProcessor.getParentOMatic(); if (parentOMatic.getRoot().isLeaf()) { // tree is basically empty, so make the list too discoveryResult.recordSuccess( ID, "Repository crawled successfully (is empty)", new ArrayListPrefixSource(Collections.<String>emptyList())); } else { discoveryResult.recordSuccess( ID, "Repository crawled successfully", new ArrayListPrefixSource(getAllLeafPaths(parentOMatic, config.getLocalScrapeDepth()))); } } catch (WalkerException e) { if (e.getWalkerContext().getStopCause() != null) { discoveryResult.recordError(ID, e.getWalkerContext().getStopCause()); } else { discoveryResult.recordError(ID, e); } } return discoveryResult; }
@Override protected List<String> diveIn(final ScrapeContext context, final Page page) throws IOException { // we use the great and all-mighty ParentOMatic final ParentOMatic parentOMatic = new ParentOMatic(); diveIn(context, page, 0, parentOMatic, parentOMatic.getRoot()); // Special case: scraped with 0 entry, we consider this as an error // Remote repo empty? Why are you proxying it? Or worse, some scrape // exotic index page and we end up with 0 entries by mistake? if (parentOMatic.getRoot().isLeaf()) { context.stop( "Remote recognized as " + getTargetedServer() + ", but scraped 0 entries. This is considered a failure."); return null; } final List<String> entries = parentOMatic.getAllLeafPaths(); return entries; }