@Override
  public DiscoveryResult<MavenHostedRepository> discoverLocalContent(
      final MavenHostedRepository mavenRepository) throws IOException {
    final DiscoveryResult<MavenHostedRepository> discoveryResult =
        new DiscoveryResult<MavenHostedRepository>(mavenRepository);
    final WalkerContext context =
        new DefaultWalkerContext(
            mavenRepository, new ResourceStoreRequest("/"), new DefaultStoreWalkerFilter(), true);
    final PrefixCollectorProcessor prefixCollectorProcessor = new PrefixCollectorProcessor();
    context.getProcessors().add(prefixCollectorProcessor);

    try {
      walker.walk(context);
      final ParentOMatic parentOMatic = prefixCollectorProcessor.getParentOMatic();
      if (parentOMatic.getRoot().isLeaf()) {
        // tree is basically empty, so make the list too
        discoveryResult.recordSuccess(
            ID,
            "Repository crawled successfully (is empty)",
            new ArrayListPrefixSource(Collections.<String>emptyList()));
      } else {
        discoveryResult.recordSuccess(
            ID,
            "Repository crawled successfully",
            new ArrayListPrefixSource(getAllLeafPaths(parentOMatic, config.getLocalScrapeDepth())));
      }
    } catch (WalkerException e) {
      if (e.getWalkerContext().getStopCause() != null) {
        discoveryResult.recordError(ID, e.getWalkerContext().getStopCause());
      } else {
        discoveryResult.recordError(ID, e);
      }
    }
    return discoveryResult;
  }
 protected List<String> getAllLeafPaths(final ParentOMatic parentOMatic, final int maxDepth) {
   // cut the tree
   if (maxDepth != Integer.MAX_VALUE) {
     parentOMatic.cutNodesDeeperThan(maxDepth);
   }
   // get leafs
   return parentOMatic.getAllLeafPaths();
 }
 @Override
 public void processItem(final WalkerContext context, final StorageItem item) throws Exception {
   // cancelation
   CancelableUtil.checkInterruption();
   if (item instanceof StorageFileItem) {
     parentOMatic.addPath(item.getPath());
   }
 }
 @Override
 protected List<String> diveIn(final ScrapeContext context, final Page page) throws IOException {
   // we use the great and all-mighty ParentOMatic
   final ParentOMatic parentOMatic = new ParentOMatic();
   diveIn(context, page, 0, parentOMatic, parentOMatic.getRoot());
   // Special case: scraped with 0 entry, we consider this as an error
   // Remote repo empty? Why are you proxying it? Or worse, some scrape
   // exotic index page and we end up with 0 entries by mistake?
   if (parentOMatic.getRoot().isLeaf()) {
     context.stop(
         "Remote recognized as "
             + getTargetedServer()
             + ", but scraped 0 entries. This is considered a failure.");
     return null;
   }
   final List<String> entries = parentOMatic.getAllLeafPaths();
   return entries;
 }
Exemple #5
0
  /**
   * Simple "naive" case. Just adding a bunch of paths.
   *
   * @throws Exception
   */
  @Test
  public void exampleCase() throws Exception {
    stringBuilder = new StringBuilder();
    final ParentOMatic cn = new ParentOMatic();

    print("Example case");
    print("");
    cn.addAndMarkPath("/foo/bam/car2");
    cn.addAndMarkPath("/foo/baz");
    cn.addAndMarkPath("/foo/baz/foo");
    cn.addAndMarkPath("/foo/bar");
    cn.addAndMarkPath("/foo/bar/car1");
    cn.addAndMarkPath("/foo/bar/car3");
    print(cn.dump());
    print("");
    print("Maven MD recreate would run against paths:");
    printListPerLine(cn.getMarkedPaths());
    doAssert();
  }
 protected void diveIn(
     final ScrapeContext context,
     final Page page,
     final int currentDepth,
     final ParentOMatic parentOMatic,
     final Node<Payload> currentNode)
     throws IOException {
   // entry protection
   if (currentDepth >= context.getScrapeDepth()) {
     return;
   }
   // cancelation
   CancelableUtil.checkInterruption();
   getLogger().debug("Processing page response from URL {}", page.getUrl());
   final Elements elements = page.getDocument().getElementsByTag("a");
   final List<String> pathElements = currentNode.getPathElements();
   final String currentPath = currentNode.getPath();
   for (Element element : elements) {
     if (isDeeperRepoLink(context, pathElements, element)) {
       if (element.text().startsWith(".")) {
         // skip hidden paths
         continue;
       }
       final Node<Payload> newSibling = parentOMatic.addPath(currentPath + "/" + element.text());
       if (element.absUrl("href").endsWith("/")) {
         // "cut" recursion preemptively to save remote fetch (and then stop recursion due to
         // depth)
         final int siblingDepth = currentDepth + 1;
         if (siblingDepth < context.getScrapeDepth()) {
           maySleepBeforeSubsequentFetch();
           final String newSiblingEncodedUrl =
               getRemoteUrlForRepositoryPath(context, newSibling.getPathElements()) + "/";
           final Page siblingPage = Page.getPageFor(context, newSiblingEncodedUrl);
           if (siblingPage.getHttpResponse().getStatusLine().getStatusCode() == 200) {
             diveIn(context, siblingPage, siblingDepth, parentOMatic, newSibling);
           } else {
             // we do expect strictly 200 here
             throw new UnexpectedPageResponse(
                 page.getUrl(), page.getHttpResponse().getStatusLine());
           }
         }
       }
     }
   }
 }
Exemple #7
0
  /**
   * "Peter's case" as Peter did actually implement this and realized that snapshot removal (main
   * work) takes 3 minutes, and all the "bookkeeping" takes 20 minutes. This is kinda "generated"
   * repository and snapshot removals are equally spread out.
   *
   * @throws Exception
   */
  @Test
  public void petersCase() throws Exception {
    stringBuilder = new StringBuilder();
    final ParentOMatic cn = new ParentOMatic();

    print("Peter's case");
    print("");
    cn.addAndMarkPath("/g1/a1/v1");
    cn.addAndMarkPath("/g1/a1/v2");
    cn.addAndMarkPath("/g1/a1/v3");
    cn.addAndMarkPath("/g1/a2/v1");
    cn.addAndMarkPath("/g1/a2/v2");
    cn.addAndMarkPath("/g1/a2/v3");
    cn.addAndMarkPath("/g1/a3/v1");
    cn.addAndMarkPath("/g1/a3/v2");
    cn.addAndMarkPath("/g1/a3/v3");
    print(cn.dump());
    print("");
    print("Maven MD recreate would run against paths:");
    printListPerLine(cn.getMarkedPaths());
    doAssert();
  }