Пример #1
0
 /**
  * Changes existing entry in a blacklist.
  *
  * @param blacklistToUse The blacklist which contains the entry.
  * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in.
  * @param oldEntry Entry to be changed.
  * @param newEntry Changed entry.
  * @return The length of the new entry.
  */
 private static int alterEntries(
     final String blacklistToUse,
     final String[] supportedBlacklistTypes,
     final String[] oldEntry,
     final String[] newEntry) {
   removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry);
   PrintWriter pw = null;
   try {
     pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklistToUse), true));
     String host, path;
     for (final String n : newEntry) {
       int pos = n.indexOf('/');
       if (pos < 0) {
         host = n;
         path = ".*";
       } else {
         host = n.substring(0, pos);
         path = n.substring(pos + 1);
       }
       pw.println(host + "/" + path);
       for (final String s : supportedBlacklistTypes) {
         if (ListManager.listSetContains(s + ".BlackLists", blacklistToUse)) {
           Switchboard.urlBlacklist.add(s, host, path);
         }
       }
       SearchEventCache.cleanupEvents(true);
     }
     pw.close();
   } catch (final IOException e) {
     Log.logSevere("BLACKLIST-CLEANER", "error on writing altered entries to blacklist", e);
   }
   return newEntry.length;
 }
Пример #2
0
  /**
   * Removes existing entries from a blacklist.
   *
   * @param blacklistToUse The blacklist which contains the
   * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in.
   * @param entries Array of entries to be deleted.
   * @return Length of the list of entries to be removed.
   */
  private static int removeEntries(
      final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) {
    // load blacklist data from file
    final List<String> list =
        FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));

    boolean listChanged = false;

    // delete the old entry from file
    for (final String entry : entries) {
      String s = entry;

      if (list != null) {

        // get rid of escape characters which make it impossible to
        // properly use contains()
        if (s.contains("\\\\")) {
          s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\"));
        }

        if (list.contains(s)) {
          listChanged = list.remove(s);
        }
      }

      // remove the entry from the running blacklist engine
      for (final String supportedBlacklistType : supportedBlacklistTypes) {
        if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
          final String host = (s.indexOf('/') == -1) ? s : s.substring(0, s.indexOf('/'));
          final String path = (s.indexOf('/') == -1) ? ".*" : s.substring(s.indexOf('/') + 1);
          try {
            Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path);
          } catch (final RuntimeException e) {
            Log.logSevere("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path);
          }
        }
      }
      SearchEventCache.cleanupEvents(true);
    }
    if (listChanged) {
      FileUtils.writeList(
          new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
    }
    return entries.length;
  }
Пример #3
0
  public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) {

    log.logInfo("Initializing Word Index for the network '" + networkName + "'.");

    if (networkName == null || networkName.length() == 0) {
      log.logSevere("no network name given - shutting down");
      System.exit(0);
    }
    this.log = log;
    this.profilesActiveCrawlsCache =
        Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));

    // make crawl profiles database and default profiles
    this.queuesRoot = queuesRoot;
    this.queuesRoot.mkdirs();
    this.log.logConfig("Initializing Crawl Profiles");

    final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
    this.profilesActiveCrawls = loadFromDB(profilesActiveFile);
    for (final byte[] handle : this.profilesActiveCrawls.keySet()) {
      CrawlProfile p;
      try {
        p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
      } catch (final IOException e) {
        p = null;
      } catch (final RowSpaceExceededException e) {
        p = null;
      }
      if (p == null) continue;
      if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) {
        removeActive(handle);
        Log.logWarning(
            "CrawlProfiles",
            "removed Profile "
                + p.handle()
                + ": "
                + p.name()
                + " from active crawls since "
                + CrawlProfile.FILTER_URL_MUSTMATCH
                + " is no valid regular expression: "
                + p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
      } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) {
        removeActive(handle);
        Log.logWarning(
            "CrawlProfiles",
            "removed Profile "
                + p.handle()
                + ": "
                + p.name()
                + " from active crawls since "
                + CrawlProfile.FILTER_URL_MUSTNOTMATCH
                + " is no valid regular expression: "
                + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH));
      } else {
        Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
      }
    }
    initActiveCrawlProfiles();
    log.logInfo(
        "Loaded active crawl profiles from file "
            + profilesActiveFile.getName()
            + ", "
            + this.profilesActiveCrawls.size()
            + " entries");

    final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES);
    this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile);
    for (final byte[] handle : this.profilesPassiveCrawls.keySet()) {
      CrawlProfile p;
      try {
        p = new CrawlProfile(this.profilesPassiveCrawls.get(handle));
        Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
      } catch (final IOException e) {
        continue;
      } catch (final RowSpaceExceededException e) {
        continue;
      }
    }
    log.logInfo(
        "Loaded passive crawl profiles from file "
            + profilesPassiveFile.getName()
            + ", "
            + this.profilesPassiveCrawls.size()
            + " entries"
            + ", "
            + profilesPassiveFile.length() / 1024);
  }