/** * Return a SnapshotManifest instance with the information already loaded in-memory. * SnapshotManifest manifest = SnapshotManifest.open(...) HTableDescriptor htd = * manifest.getTableDescriptor() for (SnapshotRegionManifest regionManifest: * manifest.getRegionManifests()) hri = regionManifest.getRegionInfo() for * (regionManifest.getFamilyFiles()) ... */ public static SnapshotManifest open( final Configuration conf, final FileSystem fs, final Path workingDir, final SnapshotDescription desc) throws IOException { SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null); manifest.load(); return manifest; }
/** * @param snapshot descriptor of the snapshot to take * @param masterServices master services provider */ public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) { super(masterServices, EventType.C_M_SNAPSHOT_TABLE); assert snapshot != null : "SnapshotDescription must not be nul1"; assert masterServices != null : "MasterServices must not be nul1"; this.master = masterServices; this.snapshot = snapshot; this.snapshotTable = TableName.valueOf(snapshot.getTable()); this.conf = this.master.getConfiguration(); this.fs = this.master.getMasterFileSystem().getFileSystem(); this.rootDir = this.master.getMasterFileSystem().getRootDir(); this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); this.monitor = new ForeignExceptionDispatcher(snapshot.getName()); this.snapshotManifest = SnapshotManifest.create(conf, fs, workingDir, snapshot, monitor); this.tableLockManager = master.getTableLockManager(); this.tableLock = this.tableLockManager.writeLock(snapshotTable, EventType.C_M_SNAPSHOT_TABLE.toString()); // prepare the verify this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir); // update the running tasks this.status = TaskMonitor.get() .createStatus("Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable); }
/** Take a snapshot of the specified disabled region */ protected void snapshotDisabledRegion(final HRegionInfo regionInfo) throws IOException { snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo); monitor.rethrowException(); status.setStatus( "Completed referencing HFiles for offline region " + regionInfo.toString() + " of table: " + snapshotTable); }
// TODO consider parallelizing these operations since they are independent. Right now its just // easier to keep them serial though @Override public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations) throws IOException, KeeperException { try { timeoutInjector.start(); // 1. get all the regions hosting this table. // extract each pair to separate lists Set<HRegionInfo> regions = new HashSet<HRegionInfo>(); for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) { regions.add(p.getFirst()); } // 2. for each region, write all the info to disk String msg = "Starting to write region info and WALs for regions for offline snapshot:" + ClientSnapshotDescriptionUtils.toString(snapshot); LOG.info(msg); status.setStatus(msg); ThreadPoolExecutor exec = SnapshotManifest.createExecutor(conf, "DisabledTableSnapshot"); try { ModifyRegionUtils.editRegions( exec, regions, new ModifyRegionUtils.RegionEditTask() { @Override public void editRegion(final HRegionInfo regionInfo) throws IOException { snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo); } }); } finally { exec.shutdown(); } } catch (Exception e) { // make sure we capture the exception to propagate back to the client later String reason = "Failed snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage(); ForeignException ee = new ForeignException(reason, e); monitor.receive(ee); status.abort("Snapshot of table: " + snapshotTable + " failed because " + e.getMessage()); } finally { LOG.debug( "Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot) + " as finished."); // 3. mark the timer as finished - even if we got an exception, we don't need to time the // operation any further timeoutInjector.complete(); } }
/** * Restore or Clone the specified snapshot * * @param reqSnapshot * @param nonceGroup unique value to prevent duplicated RPC * @param nonce unique value to prevent duplicated RPC * @throws IOException */ public long restoreOrCloneSnapshot( SnapshotDescription reqSnapshot, final long nonceGroup, final long nonce) throws IOException { FileSystem fs = master.getMasterFileSystem().getFileSystem(); Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir); // check if the snapshot exists if (!fs.exists(snapshotDir)) { LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist."); throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(reqSnapshot)); } // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot // information. SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs, snapshotDir, snapshot); HTableDescriptor snapshotTableDesc = manifest.getTableDescriptor(); TableName tableName = TableName.valueOf(reqSnapshot.getTable()); // stop tracking "abandoned" handlers cleanupSentinels(); // Verify snapshot validity SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest); // Execute the restore/clone operation long procId; if (MetaTableAccessor.tableExists(master.getConnection(), tableName)) { procId = restoreSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceGroup, nonce); } else { procId = cloneSnapshot(reqSnapshot, tableName, snapshot, snapshotTableDesc, nonceGroup, nonce); } return procId; }
/** * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} call * should get implemented for each snapshot flavor. */ @Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value = "REC_CATCH_EXCEPTION", justification = "Intentional") public void process() { String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable; LOG.info(msg); status.setStatus(msg); try { // If regions move after this meta scan, the region specific snapshot should fail, triggering // an external exception that gets captured here. // write down the snapshot info in the working directory SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs); snapshotManifest.addTableDescriptor(this.htd); monitor.rethrowException(); List<Pair<HRegionInfo, ServerName>> regionsAndLocations; if (TableName.META_TABLE_NAME.equals(snapshotTable)) { regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(server.getZooKeeper()); } else { regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations( server.getConnection(), snapshotTable, false); } // run the snapshot snapshotRegions(regionsAndLocations); monitor.rethrowException(); // extract each pair to separate lists Set<String> serverNames = new HashSet<String>(); for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) { if (p != null && p.getFirst() != null && p.getSecond() != null) { HRegionInfo hri = p.getFirst(); if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue; serverNames.add(p.getSecond().toString()); } } // flush the in-memory state, and write the single manifest status.setStatus("Consolidate snapshot: " + snapshot.getName()); snapshotManifest.consolidate(); // verify the snapshot is valid status.setStatus("Verifying snapshot: " + snapshot.getName()); verifier.verifySnapshot(this.workingDir, serverNames); // complete the snapshot, atomically moving from tmp to .snapshot dir. completeSnapshot(this.snapshotDir, this.workingDir, this.fs); msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed"; status.markComplete(msg); LOG.info(msg); metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime()); } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION status.abort( "Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage()); String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage(); LOG.error(reason, e); ForeignException ee = new ForeignException(reason, e); monitor.receive(ee); // need to mark this completed to close off and allow cleanup to happen. cancel(reason); } finally { LOG.debug("Launching cleanup of working dir:" + workingDir); try { // if the working dir is still present, the snapshot has failed. it is present we delete // it. if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) { LOG.error("Couldn't delete snapshot working directory:" + workingDir); } } catch (IOException e) { LOG.error("Couldn't delete snapshot working directory:" + workingDir); } releaseTableLock(); } }