Beispiel #1
0
  public static serverObjects respond(
      final RequestHeader header, final serverObjects post, final serverSwitch env) {
    // return variable that accumulates replacements
    final Switchboard sb = (Switchboard) env;
    final serverObjects prop = new serverObjects();

    if (post != null) {
      if (post.containsKey("retrieve")) {
        final String peerhash = post.get("peer", null);
        final yacySeed seed = (peerhash == null) ? null : sb.peers.getConnected(peerhash);
        final RSSFeed feed =
            (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(sb.peers, seed, 20, 60000);
        if (feed != null) {
          for (final Hit item : feed) {
            // System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ",
            // pubDate=" + item.getPubDate());

            // put url on remote crawl stack
            DigestURI url;
            try {
              url = new DigestURI(item.getLink());
            } catch (final MalformedURLException e) {
              url = null;
            }
            Date loaddate;
            loaddate = item.getPubDate();
            final DigestURI referrer = null; // referrer needed!
            final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(url);
            if (urlRejectReason == null) {
              // stack url
              if (sb.getLog().isFinest())
                sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
              sb.crawlStacker.enqueueEntry(
                  new Request(
                      peerhash.getBytes(),
                      url,
                      (referrer == null) ? null : referrer.hash(),
                      "REMOTE-CRAWLING",
                      loaddate,
                      sb.crawler.defaultRemoteProfile.handle(),
                      0,
                      0,
                      0,
                      item.getSize()));
            } else {
              env.getLog()
                  .logWarning(
                      "crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
            }
          }
        }
      }
    }

    listHosts(sb, prop);

    // return rewrite properties
    return prop;
  }
Beispiel #2
0
 /**
  * @param url
  * @return
  */
 private static String urlToString(final DigestURI url) {
   return (url == null ? "null" : url.toNormalform(true, false));
 }