/** * Changes existing entry in a blacklist. * * @param blacklistToUse The blacklist which contains the entry. * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in. * @param oldEntry Entry to be changed. * @param newEntry Changed entry. * @return The length of the new entry. */ private static int alterEntries( final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] oldEntry, final String[] newEntry) { removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); PrintWriter pw = null; try { pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklistToUse), true)); String host, path; for (final String n : newEntry) { int pos = n.indexOf('/'); if (pos < 0) { host = n; path = ".*"; } else { host = n.substring(0, pos); path = n.substring(pos + 1); } pw.println(host + "/" + path); for (final String s : supportedBlacklistTypes) { if (ListManager.listSetContains(s + ".BlackLists", blacklistToUse)) { Switchboard.urlBlacklist.add(s, host, path); } } SearchEventCache.cleanupEvents(true); } pw.close(); } catch (final IOException e) { Log.logSevere("BLACKLIST-CLEANER", "error on writing altered entries to blacklist", e); } return newEntry.length; }
/** * Removes existing entries from a blacklist. * * @param blacklistToUse The blacklist which contains the * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in. * @param entries Array of entries to be deleted. * @return Length of the list of entries to be removed. */ private static int removeEntries( final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) { // load blacklist data from file final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); boolean listChanged = false; // delete the old entry from file for (final String entry : entries) { String s = entry; if (list != null) { // get rid of escape characters which make it impossible to // properly use contains() if (s.contains("\\\\")) { s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\")); } if (list.contains(s)) { listChanged = list.remove(s); } } // remove the entry from the running blacklist engine for (final String supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { final String host = (s.indexOf('/') == -1) ? s : s.substring(0, s.indexOf('/')); final String path = (s.indexOf('/') == -1) ? ".*" : s.substring(s.indexOf('/') + 1); try { Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path); } catch (final RuntimeException e) { Log.logSevere("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path); } } } SearchEventCache.cleanupEvents(true); } if (listChanged) { FileUtils.writeList( new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()])); } return entries.length; }
public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) { log.logInfo("Initializing Word Index for the network '" + networkName + "'."); if (networkName == null || networkName.length() == 0) { log.logSevere("no network name given - shutting down"); System.exit(0); } this.log = log; this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder)); // make crawl profiles database and default profiles this.queuesRoot = queuesRoot; this.queuesRoot.mkdirs(); this.log.logConfig("Initializing Crawl Profiles"); final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); this.profilesActiveCrawls = loadFromDB(profilesActiveFile); for (final byte[] handle : this.profilesActiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); } catch (final IOException e) { p = null; } catch (final RowSpaceExceededException e) { p = null; } if (p == null) continue; if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) { removeActive(handle); Log.logWarning( "CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)); } else { Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } } initActiveCrawlProfiles(); log.logInfo( "Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { CrawlProfile p; try { p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } catch (final IOException e) { continue; } catch (final RowSpaceExceededException e) { continue; } } log.logInfo( "Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + ", " + this.profilesPassiveCrawls.size() + " entries" + ", " + profilesPassiveFile.length() / 1024); }