Beispiel #1
0
 /**
  * Return the value associated with the first pattern that the string matches, or the specified
  * default value if none, considering only patterns whose associated value is less than or equal
  * to maxPri.
  */
 public int getMatch(String str, int dfault, int maxPri) {
   Perl5Matcher matcher = RegexpUtil.getMatcher();
   for (Map.Entry<Pattern, Integer> ent : patternMap.entrySet()) {
     if (ent.getValue() <= maxPri) {
       Pattern pat = ent.getKey();
       if (matcher.contains(str, pat)) {
         log.debug2("getMatch(" + str + "): " + ent.getValue());
         return ent.getValue();
       }
     }
   }
   log.debug2("getMatch(" + str + "): default: " + dfault);
   return dfault;
 }
 protected void initFeatureVersions() throws PluginException.InvalidDefinition {
   if (definitionMap.containsKey(KEY_PLUGIN_FEATURE_VERSION_MAP)) {
     Map<Plugin.Feature, String> map = new HashMap<Plugin.Feature, String>();
     Map<String, String> spec =
         (Map<String, String>) definitionMap.getMap(KEY_PLUGIN_FEATURE_VERSION_MAP);
     log.debug2("features: " + spec);
     for (Map.Entry<String, String> ent : spec.entrySet()) {
       try {
         // Prefix version string with feature name to create separate
         // namespace for each feature
         String key = ent.getKey();
         map.put(Plugin.Feature.valueOf(key), key + "_" + ent.getValue());
       } catch (RuntimeException e) {
         log.warning(
             getPluginName()
                 + " set unknown feature: "
                 + ent.getKey()
                 + " to version "
                 + ent.getValue(),
             e);
         throw new PluginException.InvalidDefinition("Unknown feature: " + ent.getKey(), e);
       }
     }
     featureVersion = map;
   } else {
     featureVersion = null;
   }
 }
 void stopThread() {
   if (sizeCalcThread != null) {
     log.debug2("Stopping thread");
     sizeCalcThread.stopSizeCalc();
     sizeCalcThread = null;
   }
 }
  /** Create LockssKeystore from a config subtree */
  LockssKeyStore createLockssKeyStore(Configuration config) {
    log.debug2("Creating LockssKeyStore from config: " + config);
    String name = config.get(KEYSTORE_PARAM_NAME);
    LockssKeyStore lk = new LockssKeyStore(name);

    String file = config.get(KEYSTORE_PARAM_FILE);
    String resource = config.get(KEYSTORE_PARAM_RESOURCE);
    String url = config.get(KEYSTORE_PARAM_URL);

    if (!StringUtil.isNullString(file)) {
      lk.setLocation(file, LocationType.File);
    } else if (!StringUtil.isNullString(resource)) {
      lk.setLocation(resource, LocationType.Resource);
    } else if (!StringUtil.isNullString(url)) {
      lk.setLocation(url, LocationType.Url);
    }

    lk.setType(config.get(KEYSTORE_PARAM_TYPE, defaultKeyStoreType));
    lk.setProvider(config.get(KEYSTORE_PARAM_PROVIDER, defaultKeyStoreProvider));
    lk.setPassword(config.get(KEYSTORE_PARAM_PASSWORD));
    lk.setKeyPassword(config.get(KEYSTORE_PARAM_KEY_PASSWORD));
    lk.setKeyPasswordFile(config.get(KEYSTORE_PARAM_KEY_PASSWORD_FILE));
    lk.setMayCreate(config.getBoolean(KEYSTORE_PARAM_CREATE, DEFAULT_CREATE));
    return lk;
  }
 /** engqueue a size calculation for the node */
 public void queueSizeCalc(RepositoryNode node) {
   synchronized (sizeCalcQueue) {
     if (sizeCalcQueue.add(node)) {
       log.debug2("Queue size calc: " + node);
       startOrKickThread();
     }
   }
 }
Beispiel #6
0
  /**
   * remove the next char from the ring and return it
   *
   * @return next char from the ring
   */
  public char remove() {
    if (size == 0) {
      throw new IndexOutOfBoundsException("remove() called on empty CharRing");
    }
    if (isTrace) {
      logger.debug2("Removing head from " + toString());
    }
    char returnKar = chars[head];
    head = (head + 1) % capacity;
    size--;

    if (isTrace) {
      logger.debug2("Returning " + returnKar);
    }

    return returnKar;
  }
 void startOrKickThread() {
   if (sizeCalcThread == null) {
     log.debug2("Starting thread");
     sizeCalcThread = new SizeCalcThread();
     sizeCalcThread.start();
     sizeCalcThread.waitRunning();
   }
   sizeCalcSem.give();
 }
 static LocalRepository getLocalRepository(String repoRoot) {
   synchronized (localRepositories) {
     LocalRepository localRepo = (LocalRepository) localRepositories.get(repoRoot);
     if (localRepo == null) {
       logger.debug2("Creating LocalRepository(" + repoRoot + ")");
       localRepo = new LocalRepository(repoRoot);
       localRepositories.put(repoRoot, localRepo);
     }
     return localRepo;
   }
 }
Beispiel #9
0
 /**
  * add kar to the end of this ring
  *
  * @param kar char to add to ring
  */
 public void add(char kar) throws RingFullException {
   if (size == capacity) {
     throw new RingFullException("Array is full");
   }
   if (isTrace) {
     logger.debug2("Adding " + kar + " to " + toString());
   }
   chars[tail] = kar;
   tail = (tail + 1) % capacity;
   size++;
 }
 protected PermissionCheckerFactory getPermissionCheckerFactory() {
   if (permissionCheckerFact == null) {
     String permissionCheckerFactoryClass =
         definitionMap.getString(DefinableArchivalUnit.KEY_AU_PERMISSION_CHECKER_FACTORY, null);
     if (permissionCheckerFactoryClass != null) {
       permissionCheckerFact =
           (PermissionCheckerFactory)
               newAuxClass(permissionCheckerFactoryClass, PermissionCheckerFactory.class);
       log.debug2("Loaded PermissionCheckerFactory: " + permissionCheckerFact);
     }
   }
   return permissionCheckerFact;
 }
 /**
  * Checks the consistency of the node, and continues with its children if it's consistent.
  *
  * @param node RepositoryNodeImpl the node to check
  */
 private void recurseConsistencyCheck(RepositoryNodeImpl node) {
   logger.debug2("Checking node '" + node.getNodeUrl() + "'...");
   // check consistency at each node
   // correct/deactivate as necessary
   // 'checkNodeConsistency()' will repair if possible
   if (node.checkNodeConsistency()) {
     logger.debug3("Node consistent; recursing on children...");
     List children = node.getNodeList(null, false);
     Iterator iter = children.iterator();
     while (iter.hasNext()) {
       RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next();
       recurseConsistencyCheck(child);
     }
   } else {
     logger.debug3("Node inconsistent; deactivating...");
     deactivateInconsistentNode(node);
   }
 }
  public void loadAuConfigDescrs(Configuration config) throws ConfigurationException {
    super.loadAuConfigDescrs(config);
    this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey());
    // Now we can construct a valid CC permission checker.
    m_permissionCheckers =
        //       ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl));
        ListUtil.list(new CreativeCommonsPermissionChecker());

    paramMap.putLong(
        KEY_AU_NEW_CONTENT_CRAWL_INTERVAL,
        CurrentConfig.getTimeIntervalParam(
            PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL));
    if (log.isDebug2()) {
      log.debug2(
          "Setting Registry AU recrawl interval to "
              + StringUtil.timeIntervalToString(
                  paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL)));
    }
  }
Beispiel #13
0
 private void writeFiles() {
   PlatformUtil platutil = PlatformUtil.getInstance();
   CuIterator iter = AuUtil.getCuIterator(au);
   int errs = 0;
   CachedUrl curCu = null;
   CachedUrl nextCu = getNextCu(iter);
   while (nextCu != null) {
     curCu = nextCu;
     nextCu = getNextCu(iter);
     if (excludeDirNodes && nextCu != null && isDirOf(curCu, nextCu)) {
       continue;
     }
     CachedUrl[] cuVersions =
         curCu.getCuVersions(maxVersions > 0 ? maxVersions : Integer.MAX_VALUE);
     for (CachedUrl cu : cuVersions) {
       try {
         log.debug2("Exporting " + cu.getUrl());
         writeCu(cu);
       } catch (IOException e) {
         if (platutil.isDiskFullError(e)) {
           recordError("Disk full, can't write export file.");
           isDiskFull = true;
           return;
         }
       } catch (Exception e) {
         // XXX Would like to differentiate between errors opening or
         // reading CU, which shouldn't cause abort, and errors writing
         // to export file, which should.
         recordError("Unable to copy " + cu.getUrl(), e);
         if (errs++ >= maxErrors) {
           recordError("Aborting after " + errs + " errors");
           return;
         }
       }
     }
   }
 }
 /**
  * Factory method to create new LockssRepository instances.
  *
  * @param au the {@link ArchivalUnit}
  * @return the new LockssRepository instance
  */
 public static LockssRepository createNewLockssRepository(ArchivalUnit au) {
   String root = getRepositoryRoot(au);
   if (root == null || root.equals("null")) {
     logger.error("No repository dir set in config");
     throw new LockssRepository.RepositoryStateException("No repository dir set in config");
   }
   String auDir = LockssRepositoryImpl.mapAuToFileLocation(root, au);
   if (logger.isDebug2()) {
     logger.debug2("repo: " + auDir + ", au: " + au.getName());
   }
   staticCacheLocation = extendCacheLocation(root);
   LockssRepositoryImpl repo = new LockssRepositoryImpl(auDir);
   Plugin plugin = au.getPlugin();
   if (plugin != null) {
     LockssDaemon daemon = plugin.getDaemon();
     if (daemon != null) {
       RepositoryManager mgr = daemon.getRepositoryManager();
       if (mgr != null) {
         mgr.setRepositoryForPath(auDir, repo);
       }
     }
   }
   return repo;
 }
Beispiel #15
0
  /** Explode the archive into its constituent elements */
  public void explode() throws CacheException {
    CachedUrl cachedUrl = null;
    int goodEntries = 0;
    int badEntries = 0;
    int ignoredEntries = 0;
    int entriesBetweenSleep = 0;
    ArchiveReader arcReader = null;

    logger.info(
        (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode");
    try {
      if (storeArchive) {
        UrlCacher uc = au.makeUrlCacher(new UrlData(arcStream, arcProps, fetchUrl));
        BitSet bs = new BitSet();
        bs.set(UrlCacher.DONT_CLOSE_INPUT_STREAM_FLAG);
        uc.setFetchFlags(bs);
        uc.storeContent();
        archiveData.resetInputStream();
        arcStream = archiveData.input;
      }
      // Wrap it in an ArchiveReader
      logger.debug3("About to wrap stream");
      arcReader = wrapStream(fetchUrl, arcStream);
      logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null"));
      // Explode it
      if (arcReader == null) {
        throw new CacheException.ExploderException("no WarcReader for " + origUrl);
      }
      ArchivalUnit au = crawlFacade.getAu();
      Set stemSet = new HashSet();
      logger.debug("Exploding " + fetchUrl);
      // Iterate through the elements in the WARC file, except the first
      Iterator i = arcReader.iterator();
      // Skip first record
      for (i.next(); i.hasNext(); ) {
        // XXX probably not necessary
        helper.pokeWDog();
        if ((++entriesBetweenSleep % sleepAfter) == 0) {
          long pauseTime =
              CurrentConfig.getTimeIntervalParam(PARAM_RETRY_PAUSE, DEFAULT_RETRY_PAUSE);
          Deadline pause = Deadline.in(pauseTime);
          logger.debug3("Sleeping for " + StringUtil.timeIntervalToString(pauseTime));
          while (!pause.expired()) {
            try {
              pause.sleep();
            } catch (InterruptedException ie) {
              // no action
            }
          }
        }
        ArchiveRecord element = (ArchiveRecord) i.next();
        // Each element is a URL to be cached in a suitable AU
        ArchiveRecordHeader elementHeader = element.getHeader();
        String elementUrl = elementHeader.getUrl();
        String elementMimeType = elementHeader.getMimetype();
        long elementLength = elementHeader.getLength();
        logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType);
        if (elementUrl.startsWith("http:")) {
          ArchiveEntry ae =
              new ArchiveEntry(
                  elementUrl,
                  elementLength,
                  0, // XXX need to convert getDate string to long
                  element, // ArchiveRecord extends InputStream
                  this,
                  fetchUrl);
          ae.setHeaderFields(makeCIProperties(elementHeader));
          long bytesStored = elementLength;
          logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored);
          try {
            helper.process(ae);
          } catch (PluginException ex) {
            throw new CacheException.ExploderException("helper.process() threw", ex);
          }
          if (ae.getBaseUrl() != null) {
            if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) {
              storeEntry(ae);
              handleAddText(ae);
              goodEntries++;
              crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored);
            } else {
              ignoredEntries++;
            }
          } else {
            badEntries++;
            logger.debug2("Can't map " + elementUrl + " from " + archiveUrl);
          }
        }
      }
    } catch (IOException ex) {
      throw new CacheException.ExploderException(ex);
    } finally {
      if (arcReader != null)
        try {
          arcReader.close();
          arcReader = null;
        } catch (IOException ex) {
          throw new CacheException.ExploderException(ex);
        }
      if (cachedUrl != null) {
        cachedUrl.release();
      }
      IOUtil.safeClose(arcStream);
    }
    if (badEntries == 0 && goodEntries > 0) {
      // Make it look like a new crawl finished on each AU to which
      // URLs were added.
      for (Iterator it = touchedAus.iterator(); it.hasNext(); ) {
        ArchivalUnit au = (ArchivalUnit) it.next();
        logger.debug3(archiveUrl + " touching " + au.toString());
        AuUtil.getDaemon(au).getNodeManager(au).newContentCrawlFinished();
      }
    } else {
      ArchivalUnit au = crawlFacade.getAu();
      String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries";
      throw new CacheException.UnretryableException(msg);
    }
  }
  /** Explode the archive into its constituent elements */
  public void explode() throws CacheException {
    int goodEntries = 0;
    int badEntries = 0;
    int entriesBetweenSleep = 0;
    ArchiveReader arcReader = null;

    logger.debug(
        (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode");
    try {
      // Wrap it in an ArchiveReader
      logger.debug3("About to wrap stream");
      arcReader = wrapStream(fetchUrl, arcStream);
      logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null"));
      // Explode it
      if (arcReader == null) {
        throw new CacheException.ExploderException("no WarcReader for " + origUrl);
      }
      ArchivalUnit au = crawlFacade.getAu();
      logger.debug("Exploding " + fetchUrl);
      // Iterate through the elements in the WARC file, except the first
      Iterator<ArchiveRecord> iter = arcReader.iterator();
      // Skip first record
      if (iter.hasNext()) iter.next();
      while (iter.hasNext()) {
        helper.pokeWDog();
        // check need to pause
        handlePause(++entriesBetweenSleep);
        // handle each element in the archive
        ArchiveRecord element = iter.next();
        // Each element is a URL to be cached in our AU
        ArchiveRecordHeader elementHeader = element.getHeader();
        String elementUrl = elementHeader.getUrl();
        String elementMimeType = elementHeader.getMimetype();
        long elementLength = elementHeader.getLength();
        long elementDate;
        try {
          elementDate = ArchiveUtils.parse14DigitDate(elementHeader.getDate()).getTime();
        } catch (ParseException e) {
          elementDate = 0;
        }
        logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType);
        // add check to determine if this is a url which should be cached
        if (au.shouldBeCached(elementUrl) && elementUrl.startsWith("http:")) {
          ArchiveEntry ae =
              new ArchiveEntry(
                  elementUrl,
                  elementLength,
                  elementDate,
                  element, // ArchiveRecord extends InputStream
                  this,
                  fetchUrl);
          ae.setHeaderFields(makeCIProperties(elementHeader));
          long bytesStored = elementLength;
          logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored);
          try {
            helper.process(ae);
          } catch (PluginException ex) {
            throw new CacheException.ExploderException("helper.process() threw", ex);
          }
          if (ae.getBaseUrl() != null) {
            if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) {
              storeEntry(ae);
              handleAddText(ae);
              goodEntries++;
              // this needs to use the correct depth ? how
              CrawlUrlData cud = new CrawlUrlData(elementUrl, 0);
              crawlFacade.addToParseQueue(cud);
              crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored);
            }
          } else {
            badEntries++;
            logger.debug2("Can't map " + elementUrl + " from " + archiveUrl);
          }
        }
      }
    } catch (IOException ex) {
      throw new CacheException.ExploderException(ex);
    } finally {
      if (arcReader != null) {
        try {
          arcReader.close();
        } catch (IOException ex) {
          throw new CacheException.ExploderException(ex);
        }
      }
      IOUtil.safeClose(arcStream);
    }
    // report failed fetches
    if (badEntries != 0) {
      String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries";
      throw new CacheException.UnretryableException(msg);
    }
  }
Beispiel #17
0
  /**
   * Get or create TdbTitle for the specified properties and TdbAu.
   *
   * @param props the properties
   * @param au the TdbAu
   * @return the corresponding TdbTitle
   */
  private TdbTitle getTdbTitle(Properties props, TdbAu au) {
    TdbTitle title = null;

    // get publisher name
    String publisherNameFromProps = getTdbPublisherName(props, au);

    // get the title name
    String titleNameFromProps = getTdbTitleName(props, au);

    // get the title ID
    String titleIdFromProps = getTdbTitleId(props, au);

    String titleId = titleIdFromProps;
    if (titleId == null) {
      // generate a titleId if one not specified, using the
      // hash code of the combined title name and publisher names
      int hash = (titleNameFromProps + publisherNameFromProps).hashCode();
      titleId = (hash < 0) ? ("id:1" + (-hash)) : ("id:0" + hash);
    }

    // get publisher specified by property name
    TdbPublisher publisher = tdbPublisherMap.get(publisherNameFromProps);
    if (publisher != null) {
      // find title from publisher
      title = publisher.getTdbTitleById(titleId);
      if (title != null) {
        // warn that title name is different
        if (!title.getName().equals(titleNameFromProps)) {
          logger.warning(
              "Title for au \""
                  + au.getName()
                  + "\": \""
                  + titleNameFromProps
                  + "\" is different than existing title \""
                  + title.getName()
                  + "\" for id "
                  + titleId
                  + " -- using existing title.");
        }
        return title;
      }
    }

    if (publisher == null) {
      // warn of missing publisher name
      if (publisherNameFromProps.startsWith(UNKNOWN_PUBLISHER_PREFIX)) {
        logger.warning(
            "Publisher missing for au \""
                + au.getName()
                + "\" -- using \""
                + publisherNameFromProps
                + "\"");
      }

      // create new publisher for specified publisher name
      publisher = new TdbPublisher(publisherNameFromProps);
      tdbPublisherMap.put(publisherNameFromProps, publisher);
    }

    // warn of missing title name and/or id
    if (titleNameFromProps.startsWith(UNKNOWN_TITLE_PREFIX)) {
      logger.warning(
          "Title missing for au \"" + au.getName() + "\" -- using \"" + titleNameFromProps + "\"");
    }
    if (titleIdFromProps == null) {
      logger.debug2("Title ID missing for au \"" + au.getName() + "\" -- using " + titleId);
    }

    // create title and add to publisher
    title = new TdbTitle(titleNameFromProps, titleId);
    try {
      publisher.addTdbTitle(title);
    } catch (TdbException ex) {
      // shouldn't happen: title already exists in publisher
      logger.error(ex.getMessage(), ex);
    }

    return title;
  }
  protected void initResultMap() throws PluginException.InvalidDefinition {
    HttpResultMap hResultMap = new HttpResultMap();
    // XXX Currently this only allows a CacheResultHandler class to
    // initialize the result map.  Instead, don't use a CacheResultMap
    // directly, use either the plugin's CacheResultHandler, if specified,
    // or a default one that wraps the CacheResultMap

    String handler_class = null;
    handler_class = definitionMap.getString(KEY_EXCEPTION_HANDLER, null);
    if (handler_class != null) {
      try {
        resultHandler = (CacheResultHandler) newAuxClass(handler_class, CacheResultHandler.class);
        resultHandler.init(hResultMap);
      } catch (Exception ex) {
        throw new PluginException.InvalidDefinition(
            mapName + " has invalid Exception handler: " + handler_class, ex);
      } catch (LinkageError le) {
        throw new PluginException.InvalidDefinition(
            mapName + " has invalid Exception handler: " + handler_class, le);
      }
    } else {
      // Expect a list of mappings from either result code or exception
      // name to CacheException name
      Collection<String> mappings = definitionMap.getCollection(KEY_EXCEPTION_LIST, null);
      if (mappings != null) {
        // add each entry
        for (String entry : mappings) {
          if (log.isDebug2()) {
            log.debug2("initMap(" + entry + ")");
          }
          String first;
          String ceName;
          try {
            List<String> pair = StringUtil.breakAt(entry, '=', 2, true, true);
            first = pair.get(0);
            ceName = pair.get(1);
          } catch (Exception ex) {
            throw new PluginException.InvalidDefinition(
                "Invalid syntax: " + entry + "in " + mapName);
          }
          Object val;

          // Value should be either a CacheException or CacheResultHandler
          // class name.
          PluginFetchEventResponse resp =
              (PluginFetchEventResponse) newAuxClass(ceName, PluginFetchEventResponse.class, null);
          if (resp instanceof CacheException) {
            val = resp.getClass();
          } else if (resp instanceof CacheResultHandler) {
            val = WrapperUtil.wrap((CacheResultHandler) resp, CacheResultHandler.class);
          } else {
            throw new PluginException.InvalidDefinition(
                "Second arg not a "
                    + "CacheException or "
                    + "CacheResultHandler class: "
                    + entry
                    + ", in "
                    + mapName);
          }
          try {
            int code = Integer.parseInt(first);
            // If parseable as an integer, it's a result code.
            hResultMap.storeMapEntry(code, val);
          } catch (NumberFormatException e) {
            try {
              Class eClass = Class.forName(first);
              // If a class name, it should be an exception class
              if (Exception.class.isAssignableFrom(eClass)) {
                hResultMap.storeMapEntry(eClass, val);
              } else {
                throw new PluginException.InvalidDefinition(
                    "First arg not an " + "Exception class: " + entry + ", in " + mapName);
              }
            } catch (Exception ex) {
              throw new PluginException.InvalidDefinition(
                  "First arg not a " + "number or class: " + entry + ", in " + mapName);
            } catch (LinkageError le) {
              throw new PluginException.InvalidDefinition("Can't load " + first, le);
            }
          }
        }
      }
    }
    resultMap = hResultMap;
  }