/** checks the resources and pauses crawls if necessary */ public void resourceObserverJob() { MemoryControl.setDHTMbyte(getMinFreeMemory()); normalizedDiskFree = getNormalizedDiskFree(); normalizedMemoryFree = getNormalizedMemoryFree(); if (normalizedDiskFree.compareTo(Space.HIGH) < 0 || normalizedMemoryFree.compareTo(Space.HIGH) < 0) { if (normalizedDiskFree.compareTo(Space.HIGH) < 0) { // pause crawls if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) { log.logInfo("pausing local crawls"); sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL); } if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) { log.logInfo("pausing remote triggered crawls"); sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); } } if ((normalizedDiskFree == Space.LOW || normalizedMemoryFree.compareTo(Space.HIGH) < 0) && sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) { log.logInfo("disabling index receive"); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false); sb.peers.mySeed().setFlagAcceptRemoteIndex(false); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true); } } else { if (sb.getConfigBool( SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false)) { // we were wrong! log.logInfo("enabling index receive"); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true); sb.peers.mySeed().setFlagAcceptRemoteIndex(true); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false); } log.logInfo("resources ok"); } }
private static int importFromBookmarks( BookmarksDB db, final DigestURI baseURL, final InputStreamReader input, final String tag, final boolean importPublic) { int importCount = 0; Map<MultiProtocolURI, Properties> links = new HashMap<MultiProtocolURI, Properties>(); String title; MultiProtocolURI url; Bookmark bm; final Set<String> tags = ListManager.string2set(tag); // this allow multiple default tags try { // load the links final ContentScraper scraper = new ContentScraper(baseURL); // OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); final Writer writer = new TransformerWriter(null, null, scraper, null, false); FileUtils.copy(input, writer); writer.close(); links = scraper.getAnchors(); } catch (final IOException e) { Log.logWarning( "BOOKMARKS", "error during load of links: " + e.getClass() + " " + e.getMessage()); } for (final Entry<MultiProtocolURI, Properties> link : links.entrySet()) { url = link.getKey(); title = link.getValue().getProperty("name", ""); Log.logInfo("BOOKMARKS", "links.get(url)"); if ("".equals(title)) { // cannot be displayed title = url.toString(); } bm = db.new Bookmark(url.toString()); bm.setProperty(Bookmark.BOOKMARK_TITLE, title); bm.setTags(tags); bm.setPublic(importPublic); db.saveBookmark(bm); importCount++; } return importCount; }
public ResourceObserver(final Switchboard sb) { this.sb = sb; this.path = sb.getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, ""); log.logInfo("path for disc space measurement: " + this.path); }
public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) { log.logInfo("Initializing Word Index for the network '" + networkName + "'."); if (networkName == null || networkName.length() == 0) { log.logSevere("no network name given - shutting down"); System.exit(0); } this.log = log; this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder)); // make crawl profiles database and default profiles this.queuesRoot = queuesRoot; this.queuesRoot.mkdirs(); this.log.logConfig("Initializing Crawl Profiles"); final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); this.profilesActiveCrawls = loadFromDB(profilesActiveFile); for (final byte[] handle : this.profilesActiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); } catch (final IOException e) { p = null; } catch (final RowSpaceExceededException e) { p = null; } if (p == null) continue; if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)); } else { Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } } initActiveCrawlProfiles(); log.logInfo( "Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } catch (final IOException e) { continue; } catch (final RowSpaceExceededException e) { continue; } } log.logInfo( "Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + ", " + this.profilesPassiveCrawls.size() + " entries" + ", " + profilesPassiveFile.length() / 1024); }