/** * Return the value associated with the first pattern that the string matches, or the specified * default value if none, considering only patterns whose associated value is less than or equal * to maxPri. */ public int getMatch(String str, int dfault, int maxPri) { Perl5Matcher matcher = RegexpUtil.getMatcher(); for (Map.Entry<Pattern, Integer> ent : patternMap.entrySet()) { if (ent.getValue() <= maxPri) { Pattern pat = ent.getKey(); if (matcher.contains(str, pat)) { log.debug2("getMatch(" + str + "): " + ent.getValue()); return ent.getValue(); } } } log.debug2("getMatch(" + str + "): default: " + dfault); return dfault; }
protected void initFeatureVersions() throws PluginException.InvalidDefinition { if (definitionMap.containsKey(KEY_PLUGIN_FEATURE_VERSION_MAP)) { Map<Plugin.Feature, String> map = new HashMap<Plugin.Feature, String>(); Map<String, String> spec = (Map<String, String>) definitionMap.getMap(KEY_PLUGIN_FEATURE_VERSION_MAP); log.debug2("features: " + spec); for (Map.Entry<String, String> ent : spec.entrySet()) { try { // Prefix version string with feature name to create separate // namespace for each feature String key = ent.getKey(); map.put(Plugin.Feature.valueOf(key), key + "_" + ent.getValue()); } catch (RuntimeException e) { log.warning( getPluginName() + " set unknown feature: " + ent.getKey() + " to version " + ent.getValue(), e); throw new PluginException.InvalidDefinition("Unknown feature: " + ent.getKey(), e); } } featureVersion = map; } else { featureVersion = null; } }
void stopThread() { if (sizeCalcThread != null) { log.debug2("Stopping thread"); sizeCalcThread.stopSizeCalc(); sizeCalcThread = null; } }
/** Create LockssKeystore from a config subtree */ LockssKeyStore createLockssKeyStore(Configuration config) { log.debug2("Creating LockssKeyStore from config: " + config); String name = config.get(KEYSTORE_PARAM_NAME); LockssKeyStore lk = new LockssKeyStore(name); String file = config.get(KEYSTORE_PARAM_FILE); String resource = config.get(KEYSTORE_PARAM_RESOURCE); String url = config.get(KEYSTORE_PARAM_URL); if (!StringUtil.isNullString(file)) { lk.setLocation(file, LocationType.File); } else if (!StringUtil.isNullString(resource)) { lk.setLocation(resource, LocationType.Resource); } else if (!StringUtil.isNullString(url)) { lk.setLocation(url, LocationType.Url); } lk.setType(config.get(KEYSTORE_PARAM_TYPE, defaultKeyStoreType)); lk.setProvider(config.get(KEYSTORE_PARAM_PROVIDER, defaultKeyStoreProvider)); lk.setPassword(config.get(KEYSTORE_PARAM_PASSWORD)); lk.setKeyPassword(config.get(KEYSTORE_PARAM_KEY_PASSWORD)); lk.setKeyPasswordFile(config.get(KEYSTORE_PARAM_KEY_PASSWORD_FILE)); lk.setMayCreate(config.getBoolean(KEYSTORE_PARAM_CREATE, DEFAULT_CREATE)); return lk; }
/** engqueue a size calculation for the node */ public void queueSizeCalc(RepositoryNode node) { synchronized (sizeCalcQueue) { if (sizeCalcQueue.add(node)) { log.debug2("Queue size calc: " + node); startOrKickThread(); } } }
/** * remove the next char from the ring and return it * * @return next char from the ring */ public char remove() { if (size == 0) { throw new IndexOutOfBoundsException("remove() called on empty CharRing"); } if (isTrace) { logger.debug2("Removing head from " + toString()); } char returnKar = chars[head]; head = (head + 1) % capacity; size--; if (isTrace) { logger.debug2("Returning " + returnKar); } return returnKar; }
void startOrKickThread() { if (sizeCalcThread == null) { log.debug2("Starting thread"); sizeCalcThread = new SizeCalcThread(); sizeCalcThread.start(); sizeCalcThread.waitRunning(); } sizeCalcSem.give(); }
static LocalRepository getLocalRepository(String repoRoot) { synchronized (localRepositories) { LocalRepository localRepo = (LocalRepository) localRepositories.get(repoRoot); if (localRepo == null) { logger.debug2("Creating LocalRepository(" + repoRoot + ")"); localRepo = new LocalRepository(repoRoot); localRepositories.put(repoRoot, localRepo); } return localRepo; } }
/** * add kar to the end of this ring * * @param kar char to add to ring */ public void add(char kar) throws RingFullException { if (size == capacity) { throw new RingFullException("Array is full"); } if (isTrace) { logger.debug2("Adding " + kar + " to " + toString()); } chars[tail] = kar; tail = (tail + 1) % capacity; size++; }
protected PermissionCheckerFactory getPermissionCheckerFactory() { if (permissionCheckerFact == null) { String permissionCheckerFactoryClass = definitionMap.getString(DefinableArchivalUnit.KEY_AU_PERMISSION_CHECKER_FACTORY, null); if (permissionCheckerFactoryClass != null) { permissionCheckerFact = (PermissionCheckerFactory) newAuxClass(permissionCheckerFactoryClass, PermissionCheckerFactory.class); log.debug2("Loaded PermissionCheckerFactory: " + permissionCheckerFact); } } return permissionCheckerFact; }
/** * Checks the consistency of the node, and continues with its children if it's consistent. * * @param node RepositoryNodeImpl the node to check */ private void recurseConsistencyCheck(RepositoryNodeImpl node) { logger.debug2("Checking node '" + node.getNodeUrl() + "'..."); // check consistency at each node // correct/deactivate as necessary // 'checkNodeConsistency()' will repair if possible if (node.checkNodeConsistency()) { logger.debug3("Node consistent; recursing on children..."); List children = node.getNodeList(null, false); Iterator iter = children.iterator(); while (iter.hasNext()) { RepositoryNodeImpl child = (RepositoryNodeImpl) iter.next(); recurseConsistencyCheck(child); } } else { logger.debug3("Node inconsistent; deactivating..."); deactivateInconsistentNode(node); } }
public void loadAuConfigDescrs(Configuration config) throws ConfigurationException { super.loadAuConfigDescrs(config); this.m_registryUrl = config.get(ConfigParamDescr.BASE_URL.getKey()); // Now we can construct a valid CC permission checker. m_permissionCheckers = // ListUtil.list(new CreativeCommonsPermissionChecker(m_registryUrl)); ListUtil.list(new CreativeCommonsPermissionChecker()); paramMap.putLong( KEY_AU_NEW_CONTENT_CRAWL_INTERVAL, CurrentConfig.getTimeIntervalParam( PARAM_REGISTRY_CRAWL_INTERVAL, DEFAULT_REGISTRY_CRAWL_INTERVAL)); if (log.isDebug2()) { log.debug2( "Setting Registry AU recrawl interval to " + StringUtil.timeIntervalToString( paramMap.getLong(KEY_AU_NEW_CONTENT_CRAWL_INTERVAL))); } }
private void writeFiles() { PlatformUtil platutil = PlatformUtil.getInstance(); CuIterator iter = AuUtil.getCuIterator(au); int errs = 0; CachedUrl curCu = null; CachedUrl nextCu = getNextCu(iter); while (nextCu != null) { curCu = nextCu; nextCu = getNextCu(iter); if (excludeDirNodes && nextCu != null && isDirOf(curCu, nextCu)) { continue; } CachedUrl[] cuVersions = curCu.getCuVersions(maxVersions > 0 ? maxVersions : Integer.MAX_VALUE); for (CachedUrl cu : cuVersions) { try { log.debug2("Exporting " + cu.getUrl()); writeCu(cu); } catch (IOException e) { if (platutil.isDiskFullError(e)) { recordError("Disk full, can't write export file."); isDiskFull = true; return; } } catch (Exception e) { // XXX Would like to differentiate between errors opening or // reading CU, which shouldn't cause abort, and errors writing // to export file, which should. recordError("Unable to copy " + cu.getUrl(), e); if (errs++ >= maxErrors) { recordError("Aborting after " + errs + " errors"); return; } } } } }
/** * Factory method to create new LockssRepository instances. * * @param au the {@link ArchivalUnit} * @return the new LockssRepository instance */ public static LockssRepository createNewLockssRepository(ArchivalUnit au) { String root = getRepositoryRoot(au); if (root == null || root.equals("null")) { logger.error("No repository dir set in config"); throw new LockssRepository.RepositoryStateException("No repository dir set in config"); } String auDir = LockssRepositoryImpl.mapAuToFileLocation(root, au); if (logger.isDebug2()) { logger.debug2("repo: " + auDir + ", au: " + au.getName()); } staticCacheLocation = extendCacheLocation(root); LockssRepositoryImpl repo = new LockssRepositoryImpl(auDir); Plugin plugin = au.getPlugin(); if (plugin != null) { LockssDaemon daemon = plugin.getDaemon(); if (daemon != null) { RepositoryManager mgr = daemon.getRepositoryManager(); if (mgr != null) { mgr.setRepositoryForPath(auDir, repo); } } } return repo; }
/** Explode the archive into its constituent elements */ public void explode() throws CacheException { CachedUrl cachedUrl = null; int goodEntries = 0; int badEntries = 0; int ignoredEntries = 0; int entriesBetweenSleep = 0; ArchiveReader arcReader = null; logger.info( (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode"); try { if (storeArchive) { UrlCacher uc = au.makeUrlCacher(new UrlData(arcStream, arcProps, fetchUrl)); BitSet bs = new BitSet(); bs.set(UrlCacher.DONT_CLOSE_INPUT_STREAM_FLAG); uc.setFetchFlags(bs); uc.storeContent(); archiveData.resetInputStream(); arcStream = archiveData.input; } // Wrap it in an ArchiveReader logger.debug3("About to wrap stream"); arcReader = wrapStream(fetchUrl, arcStream); logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null")); // Explode it if (arcReader == null) { throw new CacheException.ExploderException("no WarcReader for " + origUrl); } ArchivalUnit au = crawlFacade.getAu(); Set stemSet = new HashSet(); logger.debug("Exploding " + fetchUrl); // Iterate through the elements in the WARC file, except the first Iterator i = arcReader.iterator(); // Skip first record for (i.next(); i.hasNext(); ) { // XXX probably not necessary helper.pokeWDog(); if ((++entriesBetweenSleep % sleepAfter) == 0) { long pauseTime = CurrentConfig.getTimeIntervalParam(PARAM_RETRY_PAUSE, DEFAULT_RETRY_PAUSE); Deadline pause = Deadline.in(pauseTime); logger.debug3("Sleeping for " + StringUtil.timeIntervalToString(pauseTime)); while (!pause.expired()) { try { pause.sleep(); } catch (InterruptedException ie) { // no action } } } ArchiveRecord element = (ArchiveRecord) i.next(); // Each element is a URL to be cached in a suitable AU ArchiveRecordHeader elementHeader = element.getHeader(); String elementUrl = elementHeader.getUrl(); String elementMimeType = elementHeader.getMimetype(); long elementLength = elementHeader.getLength(); logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType); if (elementUrl.startsWith("http:")) { ArchiveEntry ae = new ArchiveEntry( elementUrl, elementLength, 0, // XXX need to convert getDate string to long element, // ArchiveRecord extends InputStream this, fetchUrl); ae.setHeaderFields(makeCIProperties(elementHeader)); long bytesStored = elementLength; logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored); try { helper.process(ae); } catch (PluginException ex) { throw new CacheException.ExploderException("helper.process() threw", ex); } if (ae.getBaseUrl() != null) { if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) { storeEntry(ae); handleAddText(ae); goodEntries++; crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored); } else { ignoredEntries++; } } else { badEntries++; logger.debug2("Can't map " + elementUrl + " from " + archiveUrl); } } } } catch (IOException ex) { throw new CacheException.ExploderException(ex); } finally { if (arcReader != null) try { arcReader.close(); arcReader = null; } catch (IOException ex) { throw new CacheException.ExploderException(ex); } if (cachedUrl != null) { cachedUrl.release(); } IOUtil.safeClose(arcStream); } if (badEntries == 0 && goodEntries > 0) { // Make it look like a new crawl finished on each AU to which // URLs were added. for (Iterator it = touchedAus.iterator(); it.hasNext(); ) { ArchivalUnit au = (ArchivalUnit) it.next(); logger.debug3(archiveUrl + " touching " + au.toString()); AuUtil.getDaemon(au).getNodeManager(au).newContentCrawlFinished(); } } else { ArchivalUnit au = crawlFacade.getAu(); String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries"; throw new CacheException.UnretryableException(msg); } }
/** Explode the archive into its constituent elements */ public void explode() throws CacheException { int goodEntries = 0; int badEntries = 0; int entriesBetweenSleep = 0; ArchiveReader arcReader = null; logger.debug( (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode"); try { // Wrap it in an ArchiveReader logger.debug3("About to wrap stream"); arcReader = wrapStream(fetchUrl, arcStream); logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null")); // Explode it if (arcReader == null) { throw new CacheException.ExploderException("no WarcReader for " + origUrl); } ArchivalUnit au = crawlFacade.getAu(); logger.debug("Exploding " + fetchUrl); // Iterate through the elements in the WARC file, except the first Iterator<ArchiveRecord> iter = arcReader.iterator(); // Skip first record if (iter.hasNext()) iter.next(); while (iter.hasNext()) { helper.pokeWDog(); // check need to pause handlePause(++entriesBetweenSleep); // handle each element in the archive ArchiveRecord element = iter.next(); // Each element is a URL to be cached in our AU ArchiveRecordHeader elementHeader = element.getHeader(); String elementUrl = elementHeader.getUrl(); String elementMimeType = elementHeader.getMimetype(); long elementLength = elementHeader.getLength(); long elementDate; try { elementDate = ArchiveUtils.parse14DigitDate(elementHeader.getDate()).getTime(); } catch (ParseException e) { elementDate = 0; } logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType); // add check to determine if this is a url which should be cached if (au.shouldBeCached(elementUrl) && elementUrl.startsWith("http:")) { ArchiveEntry ae = new ArchiveEntry( elementUrl, elementLength, elementDate, element, // ArchiveRecord extends InputStream this, fetchUrl); ae.setHeaderFields(makeCIProperties(elementHeader)); long bytesStored = elementLength; logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored); try { helper.process(ae); } catch (PluginException ex) { throw new CacheException.ExploderException("helper.process() threw", ex); } if (ae.getBaseUrl() != null) { if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) { storeEntry(ae); handleAddText(ae); goodEntries++; // this needs to use the correct depth ? how CrawlUrlData cud = new CrawlUrlData(elementUrl, 0); crawlFacade.addToParseQueue(cud); crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored); } } else { badEntries++; logger.debug2("Can't map " + elementUrl + " from " + archiveUrl); } } } } catch (IOException ex) { throw new CacheException.ExploderException(ex); } finally { if (arcReader != null) { try { arcReader.close(); } catch (IOException ex) { throw new CacheException.ExploderException(ex); } } IOUtil.safeClose(arcStream); } // report failed fetches if (badEntries != 0) { String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries"; throw new CacheException.UnretryableException(msg); } }
/** * Get or create TdbTitle for the specified properties and TdbAu. * * @param props the properties * @param au the TdbAu * @return the corresponding TdbTitle */ private TdbTitle getTdbTitle(Properties props, TdbAu au) { TdbTitle title = null; // get publisher name String publisherNameFromProps = getTdbPublisherName(props, au); // get the title name String titleNameFromProps = getTdbTitleName(props, au); // get the title ID String titleIdFromProps = getTdbTitleId(props, au); String titleId = titleIdFromProps; if (titleId == null) { // generate a titleId if one not specified, using the // hash code of the combined title name and publisher names int hash = (titleNameFromProps + publisherNameFromProps).hashCode(); titleId = (hash < 0) ? ("id:1" + (-hash)) : ("id:0" + hash); } // get publisher specified by property name TdbPublisher publisher = tdbPublisherMap.get(publisherNameFromProps); if (publisher != null) { // find title from publisher title = publisher.getTdbTitleById(titleId); if (title != null) { // warn that title name is different if (!title.getName().equals(titleNameFromProps)) { logger.warning( "Title for au \"" + au.getName() + "\": \"" + titleNameFromProps + "\" is different than existing title \"" + title.getName() + "\" for id " + titleId + " -- using existing title."); } return title; } } if (publisher == null) { // warn of missing publisher name if (publisherNameFromProps.startsWith(UNKNOWN_PUBLISHER_PREFIX)) { logger.warning( "Publisher missing for au \"" + au.getName() + "\" -- using \"" + publisherNameFromProps + "\""); } // create new publisher for specified publisher name publisher = new TdbPublisher(publisherNameFromProps); tdbPublisherMap.put(publisherNameFromProps, publisher); } // warn of missing title name and/or id if (titleNameFromProps.startsWith(UNKNOWN_TITLE_PREFIX)) { logger.warning( "Title missing for au \"" + au.getName() + "\" -- using \"" + titleNameFromProps + "\""); } if (titleIdFromProps == null) { logger.debug2("Title ID missing for au \"" + au.getName() + "\" -- using " + titleId); } // create title and add to publisher title = new TdbTitle(titleNameFromProps, titleId); try { publisher.addTdbTitle(title); } catch (TdbException ex) { // shouldn't happen: title already exists in publisher logger.error(ex.getMessage(), ex); } return title; }
protected void initResultMap() throws PluginException.InvalidDefinition { HttpResultMap hResultMap = new HttpResultMap(); // XXX Currently this only allows a CacheResultHandler class to // initialize the result map. Instead, don't use a CacheResultMap // directly, use either the plugin's CacheResultHandler, if specified, // or a default one that wraps the CacheResultMap String handler_class = null; handler_class = definitionMap.getString(KEY_EXCEPTION_HANDLER, null); if (handler_class != null) { try { resultHandler = (CacheResultHandler) newAuxClass(handler_class, CacheResultHandler.class); resultHandler.init(hResultMap); } catch (Exception ex) { throw new PluginException.InvalidDefinition( mapName + " has invalid Exception handler: " + handler_class, ex); } catch (LinkageError le) { throw new PluginException.InvalidDefinition( mapName + " has invalid Exception handler: " + handler_class, le); } } else { // Expect a list of mappings from either result code or exception // name to CacheException name Collection<String> mappings = definitionMap.getCollection(KEY_EXCEPTION_LIST, null); if (mappings != null) { // add each entry for (String entry : mappings) { if (log.isDebug2()) { log.debug2("initMap(" + entry + ")"); } String first; String ceName; try { List<String> pair = StringUtil.breakAt(entry, '=', 2, true, true); first = pair.get(0); ceName = pair.get(1); } catch (Exception ex) { throw new PluginException.InvalidDefinition( "Invalid syntax: " + entry + "in " + mapName); } Object val; // Value should be either a CacheException or CacheResultHandler // class name. PluginFetchEventResponse resp = (PluginFetchEventResponse) newAuxClass(ceName, PluginFetchEventResponse.class, null); if (resp instanceof CacheException) { val = resp.getClass(); } else if (resp instanceof CacheResultHandler) { val = WrapperUtil.wrap((CacheResultHandler) resp, CacheResultHandler.class); } else { throw new PluginException.InvalidDefinition( "Second arg not a " + "CacheException or " + "CacheResultHandler class: " + entry + ", in " + mapName); } try { int code = Integer.parseInt(first); // If parseable as an integer, it's a result code. hResultMap.storeMapEntry(code, val); } catch (NumberFormatException e) { try { Class eClass = Class.forName(first); // If a class name, it should be an exception class if (Exception.class.isAssignableFrom(eClass)) { hResultMap.storeMapEntry(eClass, val); } else { throw new PluginException.InvalidDefinition( "First arg not an " + "Exception class: " + entry + ", in " + mapName); } } catch (Exception ex) { throw new PluginException.InvalidDefinition( "First arg not a " + "number or class: " + entry + ", in " + mapName); } catch (LinkageError le) { throw new PluginException.InvalidDefinition("Can't load " + first, le); } } } } } resultMap = hResultMap; }