private static int importFromBookmarks( BookmarksDB db, final DigestURI baseURL, final InputStreamReader input, final String tag, final boolean importPublic) { int importCount = 0; Map<MultiProtocolURI, Properties> links = new HashMap<MultiProtocolURI, Properties>(); String title; MultiProtocolURI url; Bookmark bm; final Set<String> tags = ListManager.string2set(tag); // this allow multiple default tags try { // load the links final ContentScraper scraper = new ContentScraper(baseURL); // OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); final Writer writer = new TransformerWriter(null, null, scraper, null, false); FileUtils.copy(input, writer); writer.close(); links = scraper.getAnchors(); } catch (final IOException e) { Log.logWarning( "BOOKMARKS", "error during load of links: " + e.getClass() + " " + e.getMessage()); } for (final Entry<MultiProtocolURI, Properties> link : links.entrySet()) { url = link.getKey(); title = link.getValue().getProperty("name", ""); Log.logInfo("BOOKMARKS", "links.get(url)"); if ("".equals(title)) { // cannot be displayed title = url.toString(); } bm = db.new Bookmark(url.toString()); bm.setProperty(Bookmark.BOOKMARK_TITLE, title); bm.setTags(tags); bm.setPublic(importPublic); db.saveBookmark(bm); importCount++; } return importCount; }
/** * returns the amount of disk space available * * @return * <ul> * <li><code>HIGH</code> if disk space is available * <li><code>MEDIUM</code> if low disk space is available * <li><code>LOW</code> if lower than hardlimit disk space is available * </ul> */ private Space getNormalizedDiskFree() { final long currentSpace = getUsableSpace(this.path); if (currentSpace < 1L) return Space.HIGH; Space ret = Space.HIGH; if (currentSpace < getMinFreeDiskSpace()) { log.logWarning( "Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is too low (< " + (getMinFreeDiskSpace() / 1024 / 1024) + " MB)"); ret = Space.MEDIUM; } if (currentSpace < getMinFreeDiskSpace_hardlimit()) { ret = Space.LOW; } return ret; }
public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) { log.logInfo("Initializing Word Index for the network '" + networkName + "'."); if (networkName == null || networkName.length() == 0) { log.logSevere("no network name given - shutting down"); System.exit(0); } this.log = log; this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder)); // make crawl profiles database and default profiles this.queuesRoot = queuesRoot; this.queuesRoot.mkdirs(); this.log.logConfig("Initializing Crawl Profiles"); final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); this.profilesActiveCrawls = loadFromDB(profilesActiveFile); for (final byte[] handle : this.profilesActiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); } catch (final IOException e) { p = null; } catch (final RowSpaceExceededException e) { p = null; } if (p == null) continue; if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)); } else { Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } } initActiveCrawlProfiles(); log.logInfo( "Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } catch (final IOException e) { continue; } catch (final RowSpaceExceededException e) { continue; } } log.logInfo( "Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + ", " + this.profilesPassiveCrawls.size() + " entries" + ", " + profilesPassiveFile.length() / 1024); }