示例#1
0
  /** checks the resources and pauses crawls if necessary */
  public void resourceObserverJob() {
    MemoryControl.setDHTMbyte(getMinFreeMemory());

    normalizedDiskFree = getNormalizedDiskFree();
    normalizedMemoryFree = getNormalizedMemoryFree();

    if (normalizedDiskFree.compareTo(Space.HIGH) < 0
        || normalizedMemoryFree.compareTo(Space.HIGH) < 0) {

      if (normalizedDiskFree.compareTo(Space.HIGH) < 0) { // pause crawls
        if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
          log.logInfo("pausing local crawls");
          sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
        }
        if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
          log.logInfo("pausing remote triggered crawls");
          sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
        }
      }

      if ((normalizedDiskFree == Space.LOW || normalizedMemoryFree.compareTo(Space.HIGH) < 0)
          && sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) {
        log.logInfo("disabling index receive");
        sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false);
        sb.peers.mySeed().setFlagAcceptRemoteIndex(false);
        sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true);
      }
    } else {
      if (sb.getConfigBool(
          SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false)) { // we were wrong!
        log.logInfo("enabling index receive");
        sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
        sb.peers.mySeed().setFlagAcceptRemoteIndex(true);
        sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false);
      }
      log.logInfo("resources ok");
    }
  }
示例#2
0
  private static int importFromBookmarks(
      BookmarksDB db,
      final DigestURI baseURL,
      final InputStreamReader input,
      final String tag,
      final boolean importPublic) {

    int importCount = 0;

    Map<MultiProtocolURI, Properties> links = new HashMap<MultiProtocolURI, Properties>();
    String title;
    MultiProtocolURI url;
    Bookmark bm;
    final Set<String> tags = ListManager.string2set(tag); // this allow multiple default tags
    try {
      // load the links
      final ContentScraper scraper = new ContentScraper(baseURL);
      // OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
      final Writer writer = new TransformerWriter(null, null, scraper, null, false);
      FileUtils.copy(input, writer);
      writer.close();
      links = scraper.getAnchors();
    } catch (final IOException e) {
      Log.logWarning(
          "BOOKMARKS", "error during load of links: " + e.getClass() + " " + e.getMessage());
    }
    for (final Entry<MultiProtocolURI, Properties> link : links.entrySet()) {
      url = link.getKey();
      title = link.getValue().getProperty("name", "");
      Log.logInfo("BOOKMARKS", "links.get(url)");
      if ("".equals(title)) { // cannot be displayed
        title = url.toString();
      }
      bm = db.new Bookmark(url.toString());
      bm.setProperty(Bookmark.BOOKMARK_TITLE, title);
      bm.setTags(tags);
      bm.setPublic(importPublic);
      db.saveBookmark(bm);

      importCount++;
    }

    return importCount;
  }
示例#3
0
 public ResourceObserver(final Switchboard sb) {
   this.sb = sb;
   this.path = sb.getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, "");
   log.logInfo("path for disc space measurement: " + this.path);
 }
示例#4
0
  public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) {

    log.logInfo("Initializing Word Index for the network '" + networkName + "'.");

    if (networkName == null || networkName.length() == 0) {
      log.logSevere("no network name given - shutting down");
      System.exit(0);
    }
    this.log = log;
    this.profilesActiveCrawlsCache =
        Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));

    // make crawl profiles database and default profiles
    this.queuesRoot = queuesRoot;
    this.queuesRoot.mkdirs();
    this.log.logConfig("Initializing Crawl Profiles");

    final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
    this.profilesActiveCrawls = loadFromDB(profilesActiveFile);
    for (final byte[] handle : this.profilesActiveCrawls.keySet()) {
      CrawlProfile p;
      try {
        p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
      } catch (final IOException e) {
        p = null;
      } catch (final RowSpaceExceededException e) {
        p = null;
      }
      if (p == null) continue;
      if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) {
        removeActive(handle);
        Log.logWarning(
            "CrawlProfiles",
            "removed Profile "
                + p.handle()
                + ": "
                + p.name()
                + " from active crawls since "
                + CrawlProfile.FILTER_URL_MUSTMATCH
                + " is no valid regular expression: "
                + p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
      } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) {
        removeActive(handle);
        Log.logWarning(
            "CrawlProfiles",
            "removed Profile "
                + p.handle()
                + ": "
                + p.name()
                + " from active crawls since "
                + CrawlProfile.FILTER_URL_MUSTNOTMATCH
                + " is no valid regular expression: "
                + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH));
      } else {
        Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
      }
    }
    initActiveCrawlProfiles();
    log.logInfo(
        "Loaded active crawl profiles from file "
            + profilesActiveFile.getName()
            + ", "
            + this.profilesActiveCrawls.size()
            + " entries");

    final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES);
    this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile);
    for (final byte[] handle : this.profilesPassiveCrawls.keySet()) {
      CrawlProfile p;
      try {
        p = new CrawlProfile(this.profilesPassiveCrawls.get(handle));
        Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
      } catch (final IOException e) {
        continue;
      } catch (final RowSpaceExceededException e) {
        continue;
      }
    }
    log.logInfo(
        "Loaded passive crawl profiles from file "
            + profilesPassiveFile.getName()
            + ", "
            + this.profilesPassiveCrawls.size()
            + " entries"
            + ", "
            + profilesPassiveFile.length() / 1024);
  }