/** * Expunge an entry from memMap while updating diskMap. * * @param entry a SoftEntry<V> obtained from refQueuePoll() */ private synchronized void pageOutStaleEntry(SoftEntry<V> entry) { PhantomEntry<V> phantom = entry.phantom; // Still in memMap? if not, was paged-out by earlier direct access // before placed into reference-queue; just return if (memMap.get(phantom.key) != entry) { // NOTE: intentional identity compare return; } // recover hidden value V phantomValue = phantom.doctoredGet(); // Expected value present? (should be; only clear is at end of // this method, after entry removal from memMap) if (phantomValue == null) { logger.log(Level.WARNING, "unexpected null phantomValue", new Exception()); return; // nothing to do } // given instance entry still in memMap; // we have the key and phantom Value, // the diskMap can be updated. diskMap.put(phantom.key, phantomValue); // unchecked cast expungeStatsDiskPut.incrementAndGet(); // remove memMap entry boolean removed = memMap.remove(phantom.key, entry); if (!removed) { logger.log(Level.WARNING, "expunge memMap.remove() ineffective", new Exception()); } phantom.clear(); // truly allows GC of unreferenced V object }
/** * Copies entries from an existing environment db to a new one. If historyMap is not provided, * only logs the entries that would have been copied. * * @param sourceDir existing environment database directory * @param historyMap new environment db (or null for a dry run) * @return number of records * @throws DatabaseException */ private static int copyPersistEnv(File sourceDir, StoredSortedMap<String, Map> historyMap) throws DatabaseException { int count = 0; // open the source env history DB, copying entries to target env EnhancedEnvironment sourceEnv = setupCopyEnvironment(sourceDir, true); StoredClassCatalog sourceClassCatalog = sourceEnv.getClassCatalog(); DatabaseConfig historyDbConfig = HISTORY_DB_CONFIG.toDatabaseConfig(); historyDbConfig.setReadOnly(true); Database sourceHistoryDB = sourceEnv.openDatabase(null, URI_HISTORY_DBNAME, historyDbConfig); StoredSortedMap<String, Map> sourceHistoryMap = new StoredSortedMap<String, Map>( sourceHistoryDB, new StringBinding(), new SerialBinding<Map>(sourceClassCatalog, Map.class), true); Iterator<Entry<String, Map>> iter = sourceHistoryMap.entrySet().iterator(); while (iter.hasNext()) { Entry<String, Map> item = iter.next(); if (logger.isLoggable(Level.FINE)) { logger.fine(item.getKey() + " " + new JSONObject(item.getValue())); } if (historyMap != null) { historyMap.put(item.getKey(), item.getValue()); } count++; } StoredIterator.close(iter); sourceHistoryDB.close(); sourceEnv.close(); return count; }
/** * Populates an environment db from a persist log. If historyMap is not provided, only logs the * entries that would have been populated. * * @param persistLogReader persist log * @param historyMap new environment db (or null for a dry run) * @return number of records * @throws UnsupportedEncodingException * @throws DatabaseException */ private static int populatePersistEnvFromLog( BufferedReader persistLogReader, StoredSortedMap<String, Map> historyMap) throws UnsupportedEncodingException, DatabaseException { int count = 0; Iterator<String> iter = new LineReadingIterator(persistLogReader); while (iter.hasNext()) { String line = iter.next(); if (line.length() == 0) { continue; } String[] splits = line.split(" "); if (splits.length != 2) { logger.severe("bad line has " + splits.length + " fields (should be 2): " + line); continue; } Map alist; try { alist = (Map) SerializationUtils.deserialize(Base64.decodeBase64(splits[1].getBytes("UTF-8"))); } catch (Exception e) { logger.severe("caught exception " + e + " deserializing line: " + line); continue; } if (logger.isLoggable(Level.FINE)) { logger.fine(splits[0] + " " + ArchiveUtils.prettyString(alist)); } if (historyMap != null) try { historyMap.put(splits[0], alist); } catch (Exception e) { logger.log( Level.SEVERE, "caught exception after loading " + count + " urls from the persist log (perhaps crawl was stopped by user?)", e); IOUtils.closeQuietly(persistLogReader); // seems to finish most cleanly when we return rather than throw something return count; } count++; } IOUtils.closeQuietly(persistLogReader); return count; }