public void close(Reporter reporter) throws IOException { for (Integer shard : lps.keySet()) { String lpDir = localManager.localTmpDir("" + shard); LOG.info("Closing LP for shard " + shard + " at " + lpDir); lps.get(shard).close(); LOG.info("Closed LP for shard " + shard + " at " + lpDir); progress(); String remoteDir = args.outputDirHdfs + "/" + shard; // Do all this stuff to ensure that S3 actually does delete int deleteAttempt = 4; while (fileSystem.exists(new Path(remoteDir)) && deleteAttempt > 0) { LOG.info("Deleting existing shard " + shard + " at " + remoteDir); fileSystem.delete(new Path(remoteDir), true); --deleteAttempt; } if (fileSystem.exists(new Path(remoteDir)) && deleteAttempt == 0) { throw new IOException( "Failed to delete shard " + shard + " at " + remoteDir + " after " + deleteAttempt + " attempts!"); } else { LOG.info("Deleted existing shard " + shard + " at " + remoteDir); } LOG.info("Copying " + lpDir + " to " + remoteDir); fileSystem.copyFromLocalFile(new Path(lpDir), new Path(remoteDir)); LOG.info("Copied " + lpDir + " to " + remoteDir); progress(); } localManager.cleanup(); }
public ElephantRecordWriter(Configuration conf, Args args, Progressable progressable) throws IOException { fileSystem = Utils.getFS(args.outputDirHdfs, conf); this.args = args; this.progressable = progressable; localManager = new LocalElephantManager(fileSystem, args.spec, LocalElephantManager.getTmpDirs(conf)); }
public void bumpProgress() { numWritten++; if (numWritten % 25000 == 0) { long now = System.currentTimeMillis(); long delta = now - lastCheckpoint; lastCheckpoint = now; LOG.info("Wrote last 25000 records in " + delta + " ms"); localManager.progress(); } }
private Persistence retrieveShard(int shardIdx) throws IOException { Persistence lp = null; if (lps.containsKey(shardIdx)) { lp = lps.get(shardIdx); } else { String localShard = localManager.downloadRemoteShard("" + shardIdx, null); Coordinator fact = args.spec.getCoordinator(); lp = fact.openPersistenceForAppend(localShard, args.spec.getPersistenceOptions()); lps.put(shardIdx, lp); progress(); } return lp; }