/** * Changes existing entry in a blacklist. * * @param blacklistToUse The blacklist which contains the entry. * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in. * @param oldEntry Entry to be changed. * @param newEntry Changed entry. * @return The length of the new entry. */ private static int alterEntries( final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] oldEntry, final String[] newEntry) { removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); PrintWriter pw = null; try { pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklistToUse), true)); String host, path; for (final String n : newEntry) { int pos = n.indexOf('/'); if (pos < 0) { host = n; path = ".*"; } else { host = n.substring(0, pos); path = n.substring(pos + 1); } pw.println(host + "/" + path); for (final String s : supportedBlacklistTypes) { if (ListManager.listSetContains(s + ".BlackLists", blacklistToUse)) { Switchboard.urlBlacklist.add(s, host, path); } } SearchEventCache.cleanupEvents(true); } pw.close(); } catch (final IOException e) { Log.logSevere("BLACKLIST-CLEANER", "error on writing altered entries to blacklist", e); } return newEntry.length; }
private static int importFromBookmarks( BookmarksDB db, final DigestURI baseURL, final InputStreamReader input, final String tag, final boolean importPublic) { int importCount = 0; Map<MultiProtocolURI, Properties> links = new HashMap<MultiProtocolURI, Properties>(); String title; MultiProtocolURI url; Bookmark bm; final Set<String> tags = ListManager.string2set(tag); // this allow multiple default tags try { // load the links final ContentScraper scraper = new ContentScraper(baseURL); // OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); final Writer writer = new TransformerWriter(null, null, scraper, null, false); FileUtils.copy(input, writer); writer.close(); links = scraper.getAnchors(); } catch (final IOException e) { Log.logWarning( "BOOKMARKS", "error during load of links: " + e.getClass() + " " + e.getMessage()); } for (final Entry<MultiProtocolURI, Properties> link : links.entrySet()) { url = link.getKey(); title = link.getValue().getProperty("name", ""); Log.logInfo("BOOKMARKS", "links.get(url)"); if ("".equals(title)) { // cannot be displayed title = url.toString(); } bm = db.new Bookmark(url.toString()); bm.setProperty(Bookmark.BOOKMARK_TITLE, title); bm.setTags(tags); bm.setPublic(importPublic); db.saveBookmark(bm); importCount++; } return importCount; }
/** * Loads crawl profiles from a DB file. * * @param file DB file * @return crawl profile data */ private MapHeap loadFromDB(final File file) { MapHeap ret; try { ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); } catch (final IOException e) { Log.logException(e); Log.logException(e); FileUtils.deletedelete(file); try { ret = new MapHeap( file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); } catch (final IOException e1) { Log.logException(e1); ret = null; } } return ret; }
private void resetProfiles() { this.profilesActiveCrawlsCache.clear(); final File pdb = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); if (pdb.exists()) FileUtils.deletedelete(pdb); try { this.profilesActiveCrawls = new MapHeap(pdb, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); } catch (final IOException e1) { Log.logException(e1); this.profilesActiveCrawls = null; } initActiveCrawlProfiles(); }
/** checks the resources and pauses crawls if necessary */ public void resourceObserverJob() { MemoryControl.setDHTMbyte(getMinFreeMemory()); normalizedDiskFree = getNormalizedDiskFree(); normalizedMemoryFree = getNormalizedMemoryFree(); if (normalizedDiskFree.compareTo(Space.HIGH) < 0 || normalizedMemoryFree.compareTo(Space.HIGH) < 0) { if (normalizedDiskFree.compareTo(Space.HIGH) < 0) { // pause crawls if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) { log.logInfo("pausing local crawls"); sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL); } if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) { log.logInfo("pausing remote triggered crawls"); sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); } } if ((normalizedDiskFree == Space.LOW || normalizedMemoryFree.compareTo(Space.HIGH) < 0) && sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) { log.logInfo("disabling index receive"); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false); sb.peers.mySeed().setFlagAcceptRemoteIndex(false); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true); } } else { if (sb.getConfigBool( SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false)) { // we were wrong! log.logInfo("enabling index receive"); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true); sb.peers.mySeed().setFlagAcceptRemoteIndex(true); sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false); } log.logInfo("resources ok"); } }
/** * Removes existing entries from a blacklist. * * @param blacklistToUse The blacklist which contains the * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in. * @param entries Array of entries to be deleted. * @return Length of the list of entries to be removed. */ private static int removeEntries( final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) { // load blacklist data from file final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); boolean listChanged = false; // delete the old entry from file for (final String entry : entries) { String s = entry; if (list != null) { // get rid of escape characters which make it impossible to // properly use contains() if (s.contains("\\\\")) { s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\")); } if (list.contains(s)) { listChanged = list.remove(s); } } // remove the entry from the running blacklist engine for (final String supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { final String host = (s.indexOf('/') == -1) ? s : s.substring(0, s.indexOf('/')); final String path = (s.indexOf('/') == -1) ? ".*" : s.substring(s.indexOf('/') + 1); try { Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path); } catch (final RuntimeException e) { Log.logSevere("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path); } } } SearchEventCache.cleanupEvents(true); } if (listChanged) { FileUtils.writeList( new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()])); } return entries.length; }
/** * returns the amount of disk space available * * @return * <ul> * <li><code>HIGH</code> if disk space is available * <li><code>MEDIUM</code> if low disk space is available * <li><code>LOW</code> if lower than hardlimit disk space is available * </ul> */ private Space getNormalizedDiskFree() { final long currentSpace = getUsableSpace(this.path); if (currentSpace < 1L) return Space.HIGH; Space ret = Space.HIGH; if (currentSpace < getMinFreeDiskSpace()) { log.logWarning( "Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is too low (< " + (getMinFreeDiskSpace() / 1024 / 1024) + " MB)"); ret = Space.MEDIUM; } if (currentSpace < getMinFreeDiskSpace_hardlimit()) { ret = Space.LOW; } return ret; }
public static serverObjects respond( final RequestHeader header, final serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); final String EXT = header.get("EXT", ""); final boolean html = EXT.equals("html"); final boolean xml = EXT.equals("xml"); String table = (post == null) ? null : post.get("table"); if (post == null || (!post.containsKey("commitrow") && table != null && !sb.tables.hasHeap(table))) table = null; prop.put("showtable", 0); prop.put("tablecount", sb.tables.size()); // apply deletion requests if (table != null && post != null && post.containsKey("deletetable")) { sb.tables.clear(table); table = null; } if (table == null) { // list all tables that we know int c = 0; for (final String name : sb.tables) { try { if (html) { prop.putHTML("showtable_tables_" + c + "_table", name); } if (xml) { prop.putXML("showtable_tables_" + c + "_table", name); } prop.put("showtable_tables_" + c + "_num", sb.tables.size(name)); c++; } catch (final IOException e) { } } prop.put("showtable_tables", c); prop.put("tablecount", c); return prop; } final boolean showpk = post.containsKey("pk"); final String selectKey = post.containsKey("selectKey") ? post.get("selectKey") : null; final String selectValue = (selectKey != null && post.containsKey("selectValue")) ? post.get("selectValue") : null; final String counts = post.get("count", null); int maxcount = (counts == null || counts.equals("all")) ? Integer.MAX_VALUE : post.getInt("count", 10); final String pattern = post.get("search", ""); final Pattern matcher = (pattern.length() == 0 || pattern.equals(".*")) ? null : Pattern.compile(".*" + pattern + ".*"); if (post.containsKey("deleterows")) { for (final Map.Entry<String, String> entry : post.entrySet()) { if (entry.getValue().startsWith("pk_")) try { sb.tables.delete(table, entry.getValue().substring(3).getBytes()); } catch (final IOException e) { Log.logException(e); } } } if (post.containsKey("commitrow")) { final String pk = post.get("pk"); final Map<String, byte[]> map = new HashMap<String, byte[]>(); for (final Map.Entry<String, String> entry : post.entrySet()) { if (entry.getKey().startsWith("col_")) { map.put(entry.getKey().substring(4), entry.getValue().getBytes()); } } try { if (pk == null || pk.length() == 0) { sb.tables.insert(table, map); } else { sb.tables.update(table, pk.getBytes(), map); } } catch (final IOException e) { Log.logException(e); } catch (final RowSpaceExceededException e) { Log.logException(e); } } // generate table prop.put("showtable", 1); prop.put("showtable_table", table); // insert the columns ArrayList<String> columns = null; try { columns = sb.tables.columns(table); } catch (final IOException e) { Log.logException(e); columns = new ArrayList<String>(); } // if a row attribute is given // then order the columns according to the given order final String[] row = post.get("row", "").split(","); for (int i = 0; i < row.length; i++) { if (columns.contains(row[i])) { columns.remove(row[i]); if (i < columns.size()) columns.add(i, row[i]); } } prop.put("showtable_showpk", showpk ? 1 : 0); for (int i = 0; i < columns.size(); i++) { prop.putHTML("showtable_columns_" + i + "_header", columns.get(i)); } prop.put("showtable_columns", columns.size()); // insert all rows try { maxcount = Math.min(maxcount, sb.tables.size(table)); } catch (final IOException e) { Log.logException(e); maxcount = 0; } int count = 0; try { final Iterator<Tables.Row> plainIterator = sb.tables.iterator(table, matcher); final Iterator<Tables.Row> mapIterator = sb.tables.orderByPK(plainIterator, maxcount).iterator(); Tables.Row trow; boolean dark = true; String cellName, cellValue; rowloop: while ((mapIterator.hasNext()) && (count < maxcount)) { trow = mapIterator.next(); if (row == null) continue; prop.put("showtable_list_" + count + "_dark", ((dark) ? 1 : 0)); dark = !dark; prop.put("showtable_list_" + count + "_showpk", showpk ? 1 : 0); prop.put("showtable_list_" + count + "_showpk_pk", UTF8.String(trow.getPK())); prop.put("showtable_list_" + count + "_count", count); for (int i = 0; i < columns.size(); i++) { cellName = columns.get(i); if (trow.containsKey(cellName)) { cellValue = UTF8.String(trow.get(cellName)); if (selectKey != null && cellName.equals(selectKey) && !cellValue.matches(selectValue)) continue rowloop; } else { cellValue = ""; } if (html) { prop.putHTML("showtable_list_" + count + "_columns_" + i + "_column", cellName); prop.putHTML("showtable_list_" + count + "_columns_" + i + "_cell", cellValue); } if (xml) { prop.putXML("showtable_list_" + count + "_columns_" + i + "_column", cellName); prop.putXML("showtable_list_" + count + "_columns_" + i + "_cell", cellValue); } } prop.put("showtable_list_" + count + "_columns", columns.size()); count++; } } catch (final IOException e) { Log.logException(e); } prop.put("showtable_list", count); prop.put("showtable_num", count); // return rewrite properties return prop; }
public ResourceObserver(final Switchboard sb) { this.sb = sb; this.path = sb.getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, ""); log.logInfo("path for disc space measurement: " + this.path); }
public static serverObjects respond( final RequestHeader header, final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; // set if this should be visible if (yacyBuildProperties.isPkgManager()) { prop.put("candeploy", "2"); return prop; } else if (OS.canExecUnix || OS.isWindows) { // we can deploy a new system with (i.e.) // cd DATA/RELEASE;tar xfz $1;cp -Rf yacy/* ../../;rm -Rf yacy prop.put("candeploy", "1"); } else { prop.put("candeploy", "0"); } prop.put("candeploy_configCommit", "0"); prop.put("candeploy_autoUpdate", "0"); prop.put("candeploy_downloadsAvailable", "0"); if (post != null) { // check if update is supposed to be installed and a release is defined if (post.containsKey("update") && !post.get("releaseinstall", "").isEmpty()) { prop.put("forwardToSteering", "1"); prop.putHTML("forwardToSteering_release", post.get("releaseinstall", "")); prop.put("deploys", "1"); prop.put("candeploy", "2"); // display nothing else return prop; } if (post.containsKey("downloadRelease")) { // download a release final String release = post.get("releasedownload", ""); if (!release.isEmpty()) { try { yacyRelease versionToDownload = new yacyRelease(new DigestURI(release)); // replace this version with version which contains public key final yacyRelease.DevAndMainVersions allReleases = yacyRelease.allReleases(false, false); final Set<yacyRelease> mostReleases = versionToDownload.isMainRelease() ? allReleases.main : allReleases.dev; for (final yacyRelease rel : mostReleases) { if (rel.equals(versionToDownload)) { versionToDownload = rel; break; } } versionToDownload.downloadRelease(); } catch (final IOException e) { // TODO Auto-generated catch block Log.logException(e); } } } if (post.containsKey("checkRelease")) { yacyRelease.allReleases(true, false); } if (post.containsKey("deleteRelease")) { final String release = post.get("releaseinstall", ""); if (!release.isEmpty()) { try { FileUtils.deletedelete(new File(sb.releasePath, release)); FileUtils.deletedelete(new File(sb.releasePath, release + ".sig")); } catch (final NullPointerException e) { sb.getLog() .logSevere( "AUTO-UPDATE: could not delete release " + release + ": " + e.getMessage()); } } } if (post.containsKey("autoUpdate")) { final yacyRelease updateVersion = yacyRelease.rulebasedUpdateInfo(true); if (updateVersion == null) { prop.put("candeploy_autoUpdate", "2"); // no more recent release found } else { // there is a version that is more recent. Load it and re-start with it sb.getLog() .logInfo("AUTO-UPDATE: downloading more recent release " + updateVersion.getUrl()); final File downloaded = updateVersion.downloadRelease(); prop.putHTML("candeploy_autoUpdate_downloadedRelease", updateVersion.getName()); final boolean devenvironment = new File(sb.getAppPath(), ".svn").exists(); if (devenvironment) { sb.getLog() .logInfo("AUTO-UPDATE: omitting update because this is a development environment"); prop.put("candeploy_autoUpdate", "3"); } else if ((downloaded == null) || (!downloaded.exists()) || (downloaded.length() == 0)) { sb.getLog() .logInfo( "AUTO-UPDATE: omitting update because download failed (file cannot be found, is too small or signature was bad)"); prop.put("candeploy_autoUpdate", "4"); } else { yacyRelease.deployRelease(downloaded); sb.terminate(10, "manual release update to " + downloaded.getName()); sb.getLog().logInfo("AUTO-UPDATE: deploy and restart initiated"); prop.put("candeploy_autoUpdate", "1"); } } } if (post.containsKey("configSubmit")) { prop.put("candeploy_configCommit", "1"); sb.setConfig( "update.process", ("manual".equals(post.get("updateMode", "manual"))) ? "manual" : "auto"); sb.setConfig("update.cycle", Math.max(12, post.getLong("cycle", 168))); sb.setConfig("update.blacklist", post.get("blacklist", "")); sb.setConfig( "update.concept", ("any".equals(post.get("releaseType", "any"))) ? "any" : "main"); sb.setConfig( "update.onlySignedFiles", (post.getBoolean("onlySignedFiles", false)) ? "1" : "0"); } } // version information final String versionstring = yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision(); prop.putHTML("candeploy_versionpp", versionstring); final boolean devenvironment = new File(sb.getAppPath(), ".svn").exists(); float thisVersion = Float.parseFloat(yacyBuildProperties.getVersion()); // cut off the SVN Rev in the Version try { thisVersion = (float) (Math.round(thisVersion * 1000.0) / 1000.0); } catch (final NumberFormatException e) { } // list downloaded releases final File[] downloadedFiles = sb.releasePath.listFiles(); // list can be null if RELEASE directory has been deleted manually final int downloadedFilesNum = (downloadedFiles == null) ? 0 : downloadedFiles.length; prop.put( "candeploy_deployenabled", (downloadedFilesNum == 0) ? "0" : ((devenvironment) ? "1" : "2")); // prevent that a developer-version is over-deployed final NavigableSet<yacyRelease> downloadedReleases = new TreeSet<yacyRelease>(); for (final File downloaded : downloadedFiles) { try { final yacyRelease release = new yacyRelease(downloaded); downloadedReleases.add(release); } catch (final RuntimeException e) { // not a valid release // can be also a restart- or deploy-file final File invalid = downloaded; if (!(invalid.getName().endsWith(".bat") || invalid.getName().endsWith(".sh") || invalid .getName() .endsWith(".sig"))) { // Windows & Linux don't like deleted scripts while execution! invalid.deleteOnExit(); } } } // latest downloaded release final yacyVersion dflt = (downloadedReleases.isEmpty()) ? null : downloadedReleases.last(); // check if there are any downloaded releases and if there are enable the update buttons prop.put("candeploy_downloadsAvailable", (downloadedReleases.isEmpty()) ? "0" : "1"); prop.put( "candeploy_deployenabled_buttonsActive", (downloadedReleases.isEmpty() || devenvironment) ? "0" : "1"); int relcount = 0; for (final yacyRelease release : downloadedReleases) { prop.put( "candeploy_downloadedreleases_" + relcount + "_name", ((release.isMainRelease()) ? "main" : "dev") + " " + release.getReleaseNr() + "/" + release.getSvn()); prop.put( "candeploy_downloadedreleases_" + relcount + "_signature", (release.getSignatureFile().exists() ? "1" : "0")); prop.putHTML("candeploy_downloadedreleases_" + relcount + "_file", release.getName()); prop.put( "candeploy_downloadedreleases_" + relcount + "_selected", (release == dflt) ? "1" : "0"); relcount++; } prop.put("candeploy_downloadedreleases", relcount); // list remotely available releases final yacyRelease.DevAndMainVersions releasess = yacyRelease.allReleases(false, false); relcount = 0; final ArrayList<yacyRelease> rlist = new ArrayList<yacyRelease>(); final Set<yacyRelease> remoteDevReleases = releasess.dev; remoteDevReleases.removeAll(downloadedReleases); for (final yacyRelease release : remoteDevReleases) { rlist.add(release); } final Set<yacyRelease> remoteMainReleases = releasess.main; remoteMainReleases.removeAll(downloadedReleases); for (final yacyRelease release : remoteMainReleases) { rlist.add(release); } yacyRelease release; for (int i = rlist.size() - 1; i >= 0; i--) { release = rlist.get(i); prop.put( "candeploy_availreleases_" + relcount + "_name", ((release.isMainRelease()) ? "main" : "dev") + " " + release.getReleaseNr() + "/" + release.getSvn()); prop.put("candeploy_availreleases_" + relcount + "_url", release.getUrl().toString()); prop.put( "candeploy_availreleases_" + relcount + "_signatures", (release.getPublicKey() != null ? "1" : "0")); prop.put("candeploy_availreleases_" + relcount + "_selected", (relcount == 0) ? "1" : "0"); relcount++; } prop.put("candeploy_availreleases", relcount); // properties for automated system update prop.put( "candeploy_manualUpdateChecked", ("manual".equals(sb.getConfig("update.process", "manual"))) ? "1" : "0"); prop.put( "candeploy_autoUpdateChecked", ("auto".equals(sb.getConfig("update.process", "manual"))) ? "1" : "0"); prop.put("candeploy_cycle", sb.getConfigLong("update.cycle", 168)); prop.putHTML("candeploy_blacklist", sb.getConfig("update.blacklist", "")); prop.put( "candeploy_releaseTypeMainChecked", ("any".equals(sb.getConfig("update.concept", "any"))) ? "0" : "1"); prop.put( "candeploy_releaseTypeAnyChecked", ("any".equals(sb.getConfig("update.concept", "any"))) ? "1" : "0"); prop.put("candeploy_lastlookup", (sb.getConfigLong("update.time.lookup", 0) == 0) ? "0" : "1"); prop.put( "candeploy_lastlookup_time", new Date(sb.getConfigLong("update.time.lookup", 0)).toString()); prop.put( "candeploy_lastdownload", (sb.getConfigLong("update.time.download", 0) == 0) ? "0" : "1"); prop.put( "candeploy_lastdownload_time", new Date(sb.getConfigLong("update.time.download", 0)).toString()); prop.put("candeploy_lastdeploy", (sb.getConfigLong("update.time.deploy", 0) == 0) ? "0" : "1"); prop.put( "candeploy_lastdeploy_time", new Date(sb.getConfigLong("update.time.deploy", 0)).toString()); prop.put( "candeploy_onlySignedFiles", ("1".equals(sb.getConfig("update.onlySignedFiles", "1"))) ? "1" : "0"); /* if ((adminaccess) && (yacyVersion.latestRelease >= (thisVersion+0.01))) { // only new Versions(not new SVN) if ((yacyVersion.latestMainRelease != null) || (yacyVersion.latestDevRelease != null)) { prop.put("hintVersionDownload", 1); } else if ((post != null) && (post.containsKey("aquirerelease"))) { yacyVersion.aquireLatestReleaseInfo(); prop.put("hintVersionDownload", 1); } else { prop.put("hintVersionAvailable", 1); } } prop.put("hintVersionAvailable", 1); // for testing prop.putASIS("hintVersionDownload_versionResMain", (yacyVersion.latestMainRelease == null) ? "-" : yacyVersion.latestMainRelease.toAnchor()); prop.putASIS("hintVersionDownload_versionResDev", (yacyVersion.latestDevRelease == null) ? "-" : yacyVersion.latestDevRelease.toAnchor()); prop.put("hintVersionAvailable_latestVersion", Float.toString(yacyVersion.latestRelease)); */ return prop; }
public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) { log.logInfo("Initializing Word Index for the network '" + networkName + "'."); if (networkName == null || networkName.length() == 0) { log.logSevere("no network name given - shutting down"); System.exit(0); } this.log = log; this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder)); // make crawl profiles database and default profiles this.queuesRoot = queuesRoot; this.queuesRoot.mkdirs(); this.log.logConfig("Initializing Crawl Profiles"); final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); this.profilesActiveCrawls = loadFromDB(profilesActiveFile); for (final byte[] handle : this.profilesActiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); } catch (final IOException e) { p = null; } catch (final RowSpaceExceededException e) { p = null; } if (p == null) continue; if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)); } else { Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } } initActiveCrawlProfiles(); log.logInfo( "Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } catch (final IOException e) { continue; } catch (final RowSpaceExceededException e) { continue; } } log.logInfo( "Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + ", " + this.profilesPassiveCrawls.size() + " entries" + ", " + profilesPassiveFile.length() / 1024); }