/** * Delete the specified snapshot * * @param snapshot * @throws SnapshotDoesNotExistException If the specified snapshot does not exist. * @throws IOException For filesystem IOExceptions */ public void deleteSnapshot(SnapshotDescription snapshot) throws SnapshotDoesNotExistException, IOException { // check to see if it is completed if (!isSnapshotCompleted(snapshot)) { throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot)); } String snapshotName = snapshot.getName(); // first create the snapshot description and check to see if it exists FileSystem fs = master.getMasterFileSystem().getFileSystem(); Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with // just the "name" and it does not contains the "real" snapshot information snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); // call coproc pre hook MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost(); if (cpHost != null) { cpHost.preDeleteSnapshot(snapshot); } LOG.debug("Deleting snapshot: " + snapshotName); // delete the existing snapshot if (!fs.delete(snapshotDir, true)) { throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir); } // call coproc post hook if (cpHost != null) { cpHost.postDeleteSnapshot(snapshot); } }
private void createDirIfNotExist(Path path) throws IOException { if (!fs.exists(path)) { if (!fs.mkdirs(path)) { throw new IOException("Unable to create: " + path); } } }
public HdfsDirectory(Path hdfsDirPath, LockFactory lockFactory, Configuration configuration) throws IOException { super(lockFactory); this.hdfsDirPath = hdfsDirPath; this.configuration = configuration; fileSystem = FileSystem.get(hdfsDirPath.toUri(), configuration); fileContext = FileContext.getFileContext(hdfsDirPath.toUri(), configuration); if (fileSystem instanceof DistributedFileSystem) { // Make sure dfs is not in safe mode while (((DistributedFileSystem) fileSystem).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) { LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again."); try { Thread.sleep(5000); } catch (InterruptedException e) { Thread.interrupted(); // continue } } } try { if (!fileSystem.exists(hdfsDirPath)) { boolean success = fileSystem.mkdirs(hdfsDirPath); if (!success) { throw new RuntimeException("Could not create directory: " + hdfsDirPath); } } } catch (Exception e) { org.apache.solr.common.util.IOUtils.closeQuietly(fileSystem); throw new RuntimeException("Problem creating directory: " + hdfsDirPath, e); } }
private void setUnreadableBySuperuserXattrib(Path p) throws IOException { if (fs.getScheme().toLowerCase().contains("hdfs") && intermediateEncryptionEnabled && !fs.getXAttrs(p).containsKey(UNREADABLE_BY_SUPERUSER_XATTRIB)) { fs.setXAttr(p, UNREADABLE_BY_SUPERUSER_XATTRIB, null, EnumSet.of(XAttrSetFlag.CREATE)); } }
@Override public void setup(Reducer<IntWritable, Text, NullWritable, NullWritable>.Context context) { Configuration conf = context.getConfiguration(); FileSystem fs; try { fs = FileSystem.get(conf); } catch (Exception e) { throw new RuntimeException("Error opening the FileSystem!"); } RetrievalEnvironment env = null; try { env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs); } catch (IOException e) { throw new RuntimeException("Unable to create RetrievalEnvironment!"); } collectionDocumentCount = env.readCollectionDocumentCount(); try { out = fs.create(new Path(env.getTermDocVectorsForwardIndex()), true); out.writeInt(env.readDocnoOffset()); out.writeInt(collectionDocumentCount); } catch (Exception e) { throw new RuntimeException("Error in creating files!"); } }
public static void run(Configuration conf, Path input, String outputFile) throws IOException, InstantiationException, IllegalAccessException { Writer writer; if (outputFile == null) { writer = new OutputStreamWriter(System.out); } else { writer = new OutputStreamWriter( new FileOutputStream(new File(outputFile)), Charset.forName("UTF-8")); } try { FileSystem fs = input.getFileSystem(conf); for (FileStatus fst : fs.listStatus(input, new DataPathFilter())) { Path dataPath = fst.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf); try { Text key = reader.getKeyClass().asSubclass(Text.class).newInstance(); DocumentMapping value = new DocumentMapping(); while (reader.next(key, value)) { String docId = value.getDocId(); writer.write(docId + "\t" + key + "\n"); } } finally { reader.close(); } } } finally { writer.close(); } }
/* * Returns the tap for inferences */ private Map<String, Tap> getInferencesTap(Scheme scheme) { Map<String, Tap> inferencesTap = new HashMap<String, Tap>(); try { String path = null; FileSystem fs = FileSystem.get(mConfiguration.hadoopConfiguration); if (mConfiguration.doPredicateIndexing) { LiteralFields headStream = ruleStreams.getHeadStream(); path = distributedFileSystemManager.getInferencesPath(headStream); if (fs.exists(new Path(path))) { inferencesTap.put(headStream.getId().toString(), new Hfs(scheme, path)); } for (LiteralFields fields : ruleStreams.getBodyStreams()) { path = distributedFileSystemManager.getInferencesPath(fields); if (fs.exists(new Path(path))) { inferencesTap.put(fields.getId().toString(), new Hfs(scheme, path)); } } } else { path = distributedFileSystemManager.getInferencesPath(); if (fs.exists(new Path(path))) { inferencesTap.put("main", new Hfs(scheme, path)); } } } catch (IOException e) { logger.error("io exception", e); throw new RuntimeException("io exception", e); } return inferencesTap; }
public void store(Path output) throws IOException { FileSystem fs = output.getFileSystem(HadoopUtils.createConfiguration()); fs.delete(output, true); OutputStream os = fs.create(output, true); store(os); os.close(); }
@BeforeClass public static void setUp() throws Exception { Configuration conf = new Configuration(); conf.set("hadoop.security.auth_to_local", "RULE:[2:$1]"); dfsCluster = new MiniDFSCluster(conf, numSlaves, true, null); jConf = new JobConf(conf); mrCluster = new MiniMRCluster( 0, 0, numSlaves, dfsCluster.getFileSystem().getUri().toString(), 1, null, null, null, jConf); createTokenFileJson(); verifySecretKeysInJSONFile(); NameNodeAdapter.getDtSecretManager(dfsCluster.getNamesystem()).startThreads(); FileSystem fs = dfsCluster.getFileSystem(); p1 = new Path("file1"); p2 = new Path("file2"); p1 = fs.makeQualified(p1); }
/** * set up input file which has the list of input files. * * @return boolean * @throws IOException */ private boolean setup() throws IOException { estimateSavings(); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobconf); Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId); LOG.info(JOB_DIR_LABEL + "=" + jobdir); jobconf.set(JOB_DIR_LABEL, jobdir.toString()); Path log = new Path(jobdir, "_logs"); // The control file should have small size blocks. This helps // in spreading out the load from mappers that will be spawned. jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE); FileOutputFormat.setOutputPath(jobconf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobdir.getFileSystem(jobconf); Path opList = new Path(jobdir, "_" + OP_LIST_LABEL); jobconf.set(OP_LIST_LABEL, opList.toString()); int opCount = 0, synCount = 0; SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter( fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE); for (RaidPolicyPathPair p : raidPolicyPathPairList) { // If a large set of files are Raided for the first time, files // in the same directory that tend to have the same size will end up // with the same map. This shuffle mixes things up, allowing a better // mix of files. java.util.Collections.shuffle(p.srcPaths); for (FileStatus st : p.srcPaths) { opWriter.append(new Text(st.getPath().toString()), p.policy); opCount++; if (++synCount > SYNC_FILE_MAX) { opWriter.sync(); synCount = 0; } } } } finally { if (opWriter != null) { opWriter.close(); } fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file } raidPolicyPathPairList.clear(); jobconf.setInt(OP_COUNT_LABEL, opCount); LOG.info("Number of files=" + opCount); jobconf.setNumMapTasks( getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers())); LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks()); return opCount != 0; }
@Test public void testRestrictedRead() throws IOException { FileSystemPartitionView<TestRecord> partition0 = partitioned.getPartitionView(URI.create("id_hash=0")); FileSystemPartitionView<TestRecord> partition1 = partitioned.getPartitionView(URI.create("id_hash=1")); FileSystemPartitionView<TestRecord> partition2 = partitioned.getPartitionView(URI.create("id_hash=2")); FileSystemPartitionView<TestRecord> partition3 = partitioned.getPartitionView(URI.create("id_hash=3")); int count0 = DatasetTestUtilities.materialize(partition0).size(); int total = DatasetTestUtilities.materialize(partitioned).size(); Assert.assertTrue("Should read some records", count0 > 0); Assert.assertTrue("Should not read the entire dataset", count0 < total); // move other partitions so they match the partition0 constraint FileSystem local = LocalFileSystem.getInstance(); local.rename(new Path(partition1.getLocation()), new Path(partitioned.getDirectory(), "0")); local.rename( new Path(partition2.getLocation()), new Path(partitioned.getDirectory(), "hash=0")); local.rename( new Path(partition3.getLocation()), new Path(partitioned.getDirectory(), "id_hash=00")); int newCount0 = DatasetTestUtilities.materialize(partition0).size(); Assert.assertEquals("Should match original count", count0, newCount0); int countByConstraints = DatasetTestUtilities.materialize(partition0.toConstraintsView()).size(); Assert.assertEquals("Should match total count", total, countByConstraints); }
public void testInFlow() throws Exception { FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true); FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true); Hfs inTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input"); TupleEntryCollector collector = inTap.openForWrite(new HadoopFlowProcess()); collector.add(new TupleEntry(new Fields("value"), new Tuple(BRYAN.build()))); collector.add(new TupleEntry(new Fields("value"), new Tuple(LUCAS.build()))); collector.close(); Pipe inPipe = new Pipe("in"); Pipe p = new Each( inPipe, new Fields("value"), new ExpandProto(Example.Person.class), new Fields("id", "name", "email", "position")); Hfs sink = new Hfs(new TextLine(), "/tmp/output"); new HadoopFlowConnector().connect(inTap, sink, p).complete(); TupleEntryIterator iter = sink.openForRead(new HadoopFlowProcess()); List<Tuple> results = new ArrayList<Tuple>(); while (iter.hasNext()) { results.add(iter.next().getTupleCopy()); } assertEquals(2, results.size()); assertEquals( new Tuple(0, 1, "bryan", "*****@*****.**", Example.Person.Position.CEO.getNumber()) .toString(), results.get(0).toString()); assertEquals(new Tuple(25, 2, "lucas", null, null).toString(), results.get(1).toString()); }
/** * 디렉토리가 존재하지 않는다면 생성한다. * * @param directory 디렉토리 * @param conf Hadoop Configuration * @throws java.io.IOException HDFS 작업을 실패한 경우 */ public static void makeDirectoryIfNotExists(String directory, Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); if (!isExist(conf, directory) && !isDirectory(fileSystem, directory)) { fileSystem.mkdirs(new Path(directory)); } }
/** * HDFS 상에서 지정한 파일을 다른 디렉토리로 파일을 이동시킨다. * * @param conf Hadoop Configuration * @param delayFiles 이동할 파일 목록 * @param targetDirectory 목적 디렉토리 * @throws java.io.IOException 파일을 이동할 수 없는 경우 */ public static void moveFilesToDirectory( Configuration conf, List<String> delayFiles, String targetDirectory) throws IOException { for (String path : delayFiles) { String filename = FileUtils.getFilename(path); String delayedFilePrefix = filename.split("-")[0]; String outputHead = delayedFilePrefix.replaceAll("delay", ""); String outputMiddle = delayedFilePrefix.substring(0, 5); // todo String outputTail = filename.replaceAll(delayedFilePrefix, ""); System.out.println( "Acceleration Dir " + targetDirectory + "/" + outputHead + "_" + outputMiddle + outputTail); makeDirectoryIfNotExists(targetDirectory, conf); FileSystem fileSystem = FileSystem.get(conf); fileSystem.rename( new Path(path), new Path(targetDirectory + "/" + outputHead + "_" + outputMiddle + outputTail)); System.out.println("\t Moved: '" + path + "' --> '" + targetDirectory + "'"); } }
/** Obtain the owner of the log dir. This is determined by checking the job's log directory. */ static String obtainLogDirOwner(TaskAttemptID taskid) throws IOException { Configuration conf = new Configuration(); FileSystem raw = FileSystem.getLocal(conf).getRaw(); Path jobLogDir = new Path(getJobDir(taskid.getJobID()).getAbsolutePath()); FileStatus jobStat = raw.getFileStatus(jobLogDir); return jobStat.getOwner(); }
@Test public void testGetTokensForViewFS() throws IOException, URISyntaxException { Configuration conf = new Configuration(jConf); FileSystem dfs = dfsCluster.getFileSystem(); String serviceName = dfs.getCanonicalServiceName(); Path p1 = new Path("/mount1"); Path p2 = new Path("/mount2"); p1 = dfs.makeQualified(p1); p2 = dfs.makeQualified(p2); conf.set("fs.viewfs.mounttable.default.link./dir1", p1.toString()); conf.set("fs.viewfs.mounttable.default.link./dir2", p2.toString()); Credentials credentials = new Credentials(); Path lp1 = new Path("viewfs:///dir1"); Path lp2 = new Path("viewfs:///dir2"); Path[] paths = new Path[2]; paths[0] = lp1; paths[1] = lp2; TokenCache.obtainTokensForNamenodesInternal(credentials, paths, conf); Collection<Token<? extends TokenIdentifier>> tns = credentials.getAllTokens(); assertEquals("number of tokens is not 1", 1, tns.size()); boolean found = false; for (Token<? extends TokenIdentifier> tt : tns) { System.out.println("token=" + tt); if (tt.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND) && tt.getService().equals(new Text(serviceName))) { found = true; } assertTrue("didn't find token for [" + lp1 + ", " + lp2 + "]", found); } }
/** * Create log directory for the given attempt. This involves creating the following and setting * proper permissions for the new directories <br> * {hadoop.log.dir}/userlogs/<jobid> <br> * {hadoop.log.dir}/userlogs/<jobid>/<attempt-id-as-symlink> <br> * {one of the mapred-local-dirs}/userlogs/<jobid> <br> * {one of the mapred-local-dirs}/userlogs/<jobid>/<attempt-id> * * @param taskID attempt-id for which log dir is to be created * @param isCleanup Is this attempt a cleanup attempt ? * @param localDirs mapred local directories * @throws IOException */ public static void createTaskAttemptLogDir( TaskAttemptID taskID, boolean isCleanup, String[] localDirs) throws IOException { String cleanupSuffix = isCleanup ? ".cleanup" : ""; String strAttemptLogDir = getTaskAttemptLogDir(taskID, cleanupSuffix, localDirs); File attemptLogDir = new File(strAttemptLogDir); if (!attemptLogDir.mkdirs()) { throw new IOException("Creation of " + attemptLogDir + " failed."); } String strLinkAttemptLogDir = getJobDir(taskID.getJobID()).getAbsolutePath() + File.separatorChar + taskID.toString() + cleanupSuffix; if (FileUtil.symLink(strAttemptLogDir, strLinkAttemptLogDir) != 0) { throw new IOException( "Creation of symlink from " + strLinkAttemptLogDir + " to " + strAttemptLogDir + " failed."); } FileSystem localFs = FileSystem.getLocal(new Configuration()); localFs.setPermission(new Path(attemptLogDir.getPath()), new FsPermission((short) 0700)); }
static { try { // call newInstance() instead of using a shared instance from a cache // to avoid accidentally having it closed by someone else FileSystem fs = FileSystem.newInstance(FileSystem.getDefaultUri(CONF), CONF); if (!(fs instanceof DistributedFileSystem)) { String error = "Cannot connect to HDFS. " + CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY + "(" + CONF.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY) + ")" + " might be set incorrectly"; throw new RuntimeException(error); } DFS = (DistributedFileSystem) fs; } catch (IOException e) { throw new RuntimeException("couldn't retrieve FileSystem:\n" + e.getMessage(), e); } SUPPORTS_VOLUME_ID = CONF.getBoolean( DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT); }
private double[] getSparkModelInfoFromHDFS(Path location, Configuration conf) throws Exception { FileSystem fileSystem = FileSystem.get(location.toUri(), conf); FileStatus[] files = fileSystem.listStatus(location); if (files == null) throw new Exception("Couldn't find Spark Truck ML weights at: " + location); ArrayList<Double> modelInfo = new ArrayList<Double>(); for (FileStatus file : files) { if (file.getPath().getName().startsWith("_")) { continue; } InputStream stream = fileSystem.open(file.getPath()); StringWriter writer = new StringWriter(); IOUtils.copy(stream, writer, "UTF-8"); String raw = writer.toString(); for (String str : raw.split("\n")) { modelInfo.add(Double.valueOf(str)); } } return Doubles.toArray(modelInfo); }
private void writeIndexDescriptors(ETwinIndexDescriptor ETwinIndexDescriptor) throws IOException { Configuration conf = getConf(); FileSystem fs = (new Path(IndexConfig.index.get()).getFileSystem(conf)); FileStatus[] fileStats = fs.globStatus(new Path(IndexConfig.index.get(), "*")); // We write one indexDescriptor per generated index segment. // Something to consider: right now it's a straight-up serialized Thrift object. // Would it be better to do the LzoBase64Line thing, so that we can apply our tools? // or extend the tools? for (int i = 0; i < fileStats.length; i++) { ETwinIndexDescriptor.setIndexPart(i); FileStatus stat = fileStats[i]; Path idxPath = new Path(stat.getPath().getParent(), "_" + stat.getPath().getName() + ".indexmeta"); FSDataOutputStream os = fs.create(idxPath, true); @SuppressWarnings("unchecked") ThriftWritable<ETwinIndexDescriptor> writable = (ThriftWritable<ETwinIndexDescriptor>) ThriftWritable.newInstance(ETwinIndexDescriptor.getClass()); writable.set(ETwinIndexDescriptor); writable.write(os); os.close(); } }
public QseqRecordReader(Configuration conf, FileSplit split) throws IOException { setConf(conf); file = split.getPath(); start = split.getStart(); end = start + split.getLength(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(file); CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(file); if (codec == null) // no codec. Uncompressed file. { positionAtFirstRecord(fileIn); inputStream = fileIn; } else { // compressed file if (start != 0) throw new RuntimeException( "Start position for compressed file is not 0! (found " + start + ")"); inputStream = codec.createInputStream(fileIn); end = Long.MAX_VALUE; // read until the end of the file } lineReader = new LineReader(inputStream); }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { FileSystem fileSystem = FileSystem.get(configuration); if (fileSystem.isDirectory(split.getPath())) { return false; } if (fileProcessed) { return false; } int fileLength = (int) split.getLength(); byte[] result = new byte[fileLength]; FSDataInputStream inputStream = null; try { inputStream = fileSystem.open(split.getPath()); IOUtils.readFully(inputStream, result, 0, fileLength); currentValue.set(result, 0, fileLength); } finally { IOUtils.closeStream(inputStream); } fileProcessed = true; return true; }
@Test public void test() throws Exception { Connector c = getConnector(); // make a table String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); // write to it BatchWriter bw = c.createBatchWriter(tableName, new BatchWriterConfig()); Mutation m = new Mutation("row"); m.put("cf", "cq", "value"); bw.addMutation(m); bw.close(); // create a fake _tmp file in its directory String id = c.tableOperations().tableIdMap().get(tableName); FileSystem fs = getCluster().getFileSystem(); Path tmp = new Path("/accumulo/tables/" + id + "/default_tablet/junk.rf_tmp"); fs.create(tmp).close(); for (ProcessReference tserver : getCluster().getProcesses().get(ServerType.TABLET_SERVER)) { getCluster().killProcess(ServerType.TABLET_SERVER, tserver); } getCluster().start(); Scanner scanner = c.createScanner(tableName, Authorizations.EMPTY); FunctionalTestUtils.count(scanner); assertFalse(fs.exists(tmp)); }
public Path write(Message... messages) throws Exception { synchronized (WriteUsingMR.class) { outputPath = TestUtils.someTemporaryFilePath(); Path inputPath = TestUtils.someTemporaryFilePath(); FileSystem fileSystem = inputPath.getFileSystem(conf); fileSystem.create(inputPath); inputMessages = Collections.unmodifiableList(Arrays.asList(messages)); final Job job = new Job(conf, "write"); // input not really used TextInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(WritingMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(ProtoParquetOutputFormat.class); ProtoParquetOutputFormat.setOutputPath(job, outputPath); ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages)); waitForJob(job); inputMessages = null; return outputPath; } }
public boolean refresh(final Path path) throws IOException { try (FileSystem fs = path.getFileSystem(new Configuration())) { if (_fileStatus.isPresent()) { Optional<FileStatus> oldStatus = this._fileStatus; try { Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path)); this.exists = newStatus.isPresent(); return (oldStatus.isPresent() != this._fileStatus.isPresent() || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime() || oldStatus.get().isDirectory() != newStatus.get().isDirectory() || oldStatus.get().getLen() != newStatus.get().getLen()); } catch (FileNotFoundException e) { _fileStatus = Optional.absent(); this.exists = false; return true; } } else { if (path.getFileSystem(new Configuration()).exists(path)) { _fileStatus = Optional.of(fs.getFileStatus(path)); return true; } else { return false; } } } }
public static void readHiveResult(String path, OutputStreamWriter outStream, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path dir = new Path(path); if (!fs.exists(dir)) { throw new IOException("can not found path:" + path); } FileStatus[] filelist = fs.listStatus(dir); Long bytesRead = 0l; long maxsize = 1024l * 1024 * 1024 * 10; for (FileStatus f : filelist) { if (!f.isDir() && !f.getPath().getName().startsWith("_")) { FSDataInputStream in = fs.open(f.getPath()); BufferedReader bf = new BufferedReader(new InputStreamReader(in)); String line; while ((line = bf.readLine()) != null) { bytesRead += line.getBytes().length; outStream.write(line.replaceAll("\001", ",").replaceAll("\t", ",")); outStream.write("\r\n"); if (bytesRead >= maxsize) { bf.close(); in.close(); return; } } bf.close(); in.close(); } } return; }
@Test public void testNonDefaultFS() throws IOException { FileSystem fs = cluster.getFileSystem(); Configuration conf = fs.getConf(); conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); TestTrash.trashNonDefaultFS(conf); }
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext taskAttemptContext) throws IOException { context = taskAttemptContext; FileSplit fileSplit = (FileSplit) genericSplit; lzoFile = fileSplit.getPath(); // The LzoSplitInputFormat is not splittable, so the split length is the whole file. totalFileSize = fileSplit.getLength(); // Jump through some hoops to create the lzo codec. Configuration conf = CompatibilityUtil.getConfiguration(context); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(lzoFile); ((Configurable) codec).setConf(conf); LzopDecompressor lzopDecompressor = (LzopDecompressor) codec.createDecompressor(); FileSystem fs = lzoFile.getFileSystem(conf); rawInputStream = fs.open(lzoFile); // Creating the LzopInputStream here just reads the lzo header for us, nothing more. // We do the rest of our input off of the raw stream is. codec.createInputStream(rawInputStream, lzopDecompressor); // This must be called AFTER createInputStream is called, because createInputStream // is what reads the header, which has the checksum information. Otherwise getChecksumsCount // erroneously returns zero, and all block offsets will be wrong. numCompressedChecksums = lzopDecompressor.getCompressedChecksumsCount(); numDecompressedChecksums = lzopDecompressor.getDecompressedChecksumsCount(); }
@Test(timeout = 2000) public void testCleanupQueueClosesFilesystem() throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException { Configuration conf = new Configuration(); File file = new File("afile.txt"); file.createNewFile(); Path path = new Path(file.getAbsoluteFile().toURI()); FileSystem.get(conf); Assert.assertEquals(1, getFileSystemCacheSize()); // With UGI, should close FileSystem CleanupQueue cleanupQueue = new CleanupQueue(); PathDeletionContext context = new PathDeletionContext(path, conf, UserGroupInformation.getLoginUser(), null, null); cleanupQueue.addToQueue(context); while (getFileSystemCacheSize() > 0) { Thread.sleep(100); } file.createNewFile(); FileSystem.get(conf); Assert.assertEquals(1, getFileSystemCacheSize()); // Without UGI, should not close FileSystem context = new PathDeletionContext(path, conf); cleanupQueue.addToQueue(context); while (file.exists()) { Thread.sleep(100); } Assert.assertEquals(1, getFileSystemCacheSize()); }
/** * Check duplicated tweet IDs in <b>tweetIdDir</b>, and output the duplicates to stdout. * * @param tweetIdDir * @throws Exception */ public static void checkTidDuplicates(String tweetIdDir) throws Exception { // First change path strings to URI strings starting with 'file:' or 'hdfs:' tweetIdDir = MultiFileFolderWriter.getUriStrForPath(tweetIdDir); Set<String> tidSet = new HashSet<String>(); Configuration conf = HBaseConfiguration.create(); FileSystem fs = FileSystem.get(new URI(tweetIdDir), conf); int dupCount = 0; for (FileStatus srcFileStatus : fs.listStatus(new Path(tweetIdDir))) { String srcFileName = srcFileStatus.getPath().getName(); if (srcFileName.endsWith(".txt") && srcFileName.contains("tweetIds")) { BufferedReader brTid = new BufferedReader(new InputStreamReader(fs.open(srcFileStatus.getPath()))); String tid = brTid.readLine(); while (tid != null) { if (tidSet.contains(tid)) { System.out.println("Duplicated tweet ID: " + tid); dupCount++; } else { tidSet.add(tid); } tid = brTid.readLine(); } brTid.close(); } } System.out.println( "Number of unique tweet IDs: " + tidSet.size() + ", number of duplicates: " + dupCount); }