@BeforeClass public static void init() throws Exception { conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, true); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true); initCluster(true); }
private static void setupConf(Configuration conf) { // enable snapshot support conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); // disable the ui conf.setInt("hbase.regionsever.info.port", -1); // change the flush size to a small amount, regulating number of store files conf.setInt("hbase.hregion.memstore.flush.size", 25000); // so make sure we get a compaction when doing a load, but keep around // some files in the store conf.setInt("hbase.hstore.compaction.min", 10); conf.setInt("hbase.hstore.compactionThreshold", 10); // block writes if we get to 12 store files conf.setInt("hbase.hstore.blockingStoreFiles", 12); conf.setInt("hbase.regionserver.msginterval", 100); conf.setBoolean("hbase.master.enabletable.roundrobin", true); // Avoid potentially aggressive splitting which would cause snapshot to fail conf.set( HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); // Execute cleaner frequently to induce failures conf.setInt("hbase.master.cleaner.interval", CLEANER_INTERVAL); conf.setInt("hbase.master.hfilecleaner.plugins.snapshot.period", CLEANER_INTERVAL); // Effectively disable TimeToLiveHFileCleaner. Don't want to fully disable it because that // will even trigger races between creating the directory containing back references and // the back reference itself. conf.setInt("hbase.master.hfilecleaner.ttl", CLEANER_INTERVAL); }
@Override protected synchronized void startInternal() throws Exception { // create filesystem only now, as part of service-start. By this time, RM is // authenticated with kerberos so we are good to create a file-system // handle. fsConf = new Configuration(getConfig()); fsConf.setBoolean("dfs.client.retry.policy.enabled", true); String retryPolicy = fsConf.get( YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC, YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC); fsConf.set("dfs.client.retry.policy.spec", retryPolicy); String scheme = fsWorkingPath.toUri().getScheme(); if (scheme == null) { scheme = FileSystem.getDefaultUri(fsConf).getScheme(); } if (scheme != null) { String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); fsConf.setBoolean(disableCacheName, true); } fs = fsWorkingPath.getFileSystem(fsConf); mkdirsWithRetries(rmDTSecretManagerRoot); mkdirsWithRetries(rmAppRoot); mkdirsWithRetries(amrmTokenSecretManagerRoot); mkdirsWithRetries(reservationRoot); }
/** * Merge all the partial {@link org.apache.mahout.math.RandomAccessSparseVector}s into the * complete Document {@link org.apache.mahout.math.RandomAccessSparseVector} * * @param partialVectorPaths input directory of the vectors in {@link * org.apache.hadoop.io.SequenceFile} format * @param output output directory were the partial vectors have to be created * @param baseConf job configuration * @param normPower The normalization value. Must be greater than or equal to 0 or equal to {@link * #NO_NORMALIZING} * @param dimension * @param sequentialAccess output vectors should be optimized for sequential access * @param namedVector output vectors should be named, retaining key (doc id) as a label * @param numReducers The number of reducers to spawn */ public static void mergePartialVectors( Iterable<Path> partialVectorPaths, Path output, Configuration baseConf, float normPower, boolean logNormalize, int dimension, boolean sequentialAccess, boolean namedVector, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { Preconditions.checkArgument( normPower == NO_NORMALIZING || normPower >= 0, "If specified normPower must be nonnegative", normPower); Preconditions.checkArgument( normPower == NO_NORMALIZING || (normPower > 1 && !Double.isInfinite(normPower)) || !logNormalize, "normPower must be > 1 and not infinite if log normalization is chosen", normPower); Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf values conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setBoolean(SEQUENTIAL_ACCESS, sequentialAccess); conf.setBoolean(NAMED_VECTOR, namedVector); conf.setInt(DIMENSION, dimension); conf.setFloat(NORMALIZATION_POWER, normPower); conf.setBoolean(LOG_NORMALIZE, logNormalize); Job job = new Job(conf); job.setJobName("PartialVectorMerger::MergePartialVectors"); job.setJarByClass(PartialVectorMerger.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileInputFormat.setInputPaths(job, getCommaSeparatedPaths(partialVectorPaths)); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setReducerClass(PartialVectorMergeReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numReducers); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
@BeforeTest public void setupDriver() throws Exception { conf = new Configuration(); conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C1"); conf.setBoolean(CubeQueryConfUtil.DISABLE_AUTO_JOINS, false); conf.setBoolean(CubeQueryConfUtil.ENABLE_SELECT_TO_GROUPBY, true); conf.setBoolean(CubeQueryConfUtil.ENABLE_GROUP_BY_TO_SELECT, true); conf.setBoolean(CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER, false); }
/** * If ramDiskStorageLimit is >=0, then RAM_DISK capacity is artificially capped. If * ramDiskStorageLimit < 0 then it is ignored. */ protected final void startUpCluster( boolean hasTransientStorage, final int ramDiskReplicaCapacity, final boolean useSCR, final boolean useLegacyBlockReaderLocal) throws IOException { Configuration conf = new Configuration(); conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt( DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, LAZY_WRITE_FILE_SCRUBBER_INTERVAL_SEC); conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, HEARTBEAT_INTERVAL_SEC); conf.setInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, HEARTBEAT_RECHECK_INTERVAL_MSEC); conf.setInt(DFS_DATANODE_LAZY_WRITER_INTERVAL_SEC, LAZY_WRITER_INTERVAL_SEC); conf.setInt(DFS_DATANODE_RAM_DISK_LOW_WATERMARK_BYTES, EVICTION_LOW_WATERMARK * BLOCK_SIZE); if (useSCR) { conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true); // Do not share a client context across tests. conf.set(DFS_CLIENT_CONTEXT, UUID.randomUUID().toString()); if (useLegacyBlockReaderLocal) { conf.setBoolean(DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true); conf.set( DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY, UserGroupInformation.getCurrentUser().getShortUserName()); } else { sockDir = new TemporarySocketDirectory(); conf.set( DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(), this.getClass().getSimpleName() + "._PORT.sock") .getAbsolutePath()); } } long[] capacities = null; if (hasTransientStorage && ramDiskReplicaCapacity >= 0) { // Convert replica count to byte count, add some delta for .meta and // VERSION files. long ramDiskStorageLimit = ((long) ramDiskReplicaCapacity * BLOCK_SIZE) + (BLOCK_SIZE - 1); capacities = new long[] {ramDiskStorageLimit, -1}; } cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(REPL_FACTOR) .storageCapacities(capacities) .storageTypes(hasTransientStorage ? new StorageType[] {RAM_DISK, DEFAULT} : null) .build(); fs = cluster.getFileSystem(); client = fs.getClient(); try { jmx = initJMX(); } catch (Exception e) { fail("Failed initialize JMX for testing: " + e); } LOG.info("Cluster startup complete"); }
/** * Run the job * * @param input the input pathname String * @param output the output pathname String * @param catFile the file containing the Wikipedia categories * @param exactMatchOnly if true, then the Wikipedia category must match exactly instead of simply * containing the category string * @param all if true select all categories * @throws ClassNotFoundException * @throws InterruptedException */ public static void runJob( String input, String output, String catFile, boolean exactMatchOnly, boolean all) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); conf.setBoolean("exact.match.only", exactMatchOnly); conf.setBoolean("all.files", all); conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Job job = new Job(conf); if (WikipediaToSequenceFile.log.isInfoEnabled()) { log.info( "Input: " + input + " Out: " + output + " Categories: " + catFile + " All Files: " + all); } job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(WikipediaMapper.class); job.setInputFormatClass(XmlInputFormat.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(WikipediaToSequenceFile.class); /* * conf.set("mapred.compress.map.output", "true"); conf.set("mapred.map.output.compression.type", * "BLOCK"); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression.type", * "BLOCK"); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); */ HadoopUtil.overwriteOutput(outPath); Set<String> categories = new HashSet<String>(); if (catFile.length() > 0) { for (String line : new FileLineIterable(new File(catFile))) { categories.add(line.trim().toLowerCase(Locale.ENGLISH)); } } DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories)); String categoriesStr = setStringifier.toString(categories); conf.set("wikipedia.categories", categoriesStr); job.waitForCompletion(true); }
/** * Create a partial tfidf vector using a chunk of features from the input vectors. The input * vectors has to be in the {@link SequenceFile} format * * @param input input directory of the vectors in {@link SequenceFile} format * @param featureCount Number of unique features in the dataset * @param vectorCount Number of vectors in the dataset * @param minDf The minimum document frequency. Default 1 * @param maxDF The max percentage of vectors for the DF. Can be used to remove really high * frequency features. Expressed as an integer between 0 and 100. Default 99 * @param dictionaryFilePath location of the chunk of features and the id's * @param output output directory were the partial vectors have to be created * @param sequentialAccess output vectors should be optimized for sequential access * @param namedVector output vectors should be named, retaining key (doc id) as a label */ private static void makePartialVectors( Path input, Configuration baseConf, Long featureCount, Long vectorCount, int minDf, long maxDF, Path dictionaryFilePath, Path output, boolean sequentialAccess, boolean namedVector) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf values conf.set( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setLong(FEATURE_COUNT, featureCount); conf.setLong(VECTOR_COUNT, vectorCount); conf.setInt(MIN_DF, minDf); conf.setLong(MAX_DF, maxDF); conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess); conf.setBoolean(PartialVectorMerger.NAMED_VECTOR, namedVector); DistributedCache.addCacheFile(dictionaryFilePath.toUri(), conf); Job job = new Job(conf); job.setJobName( ": MakePartialVectors: input-folder: " + input + ", dictionary-file: " + dictionaryFilePath.toString()); job.setJarByClass(TFIDFConverter.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setReducerClass(TFIDFPartialVectorReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
@BeforeTest public void setupDriver() throws Exception { conf = new Configuration(); conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C1,C2"); conf.setBoolean(CubeQueryConfUtil.DISABLE_AUTO_JOINS, false); conf.setBoolean(CubeQueryConfUtil.ENABLE_SELECT_TO_GROUPBY, true); conf.setBoolean(CubeQueryConfUtil.ENABLE_GROUP_BY_TO_SELECT, true); conf.setBoolean(CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER, false); conf.setClass( CubeQueryConfUtil.TIME_RANGE_WRITER_CLASS, BetweenTimeRangeWriter.class.asSubclass(TimeRangeWriter.class), TimeRangeWriter.class); }
private void mySetup(int stripeLength) throws Exception { if (System.getProperty("hadoop.log.dir") == null) { String base = new File(".").getAbsolutePath(); System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs"); } new File(TEST_DIR).mkdirs(); // Make sure data directory exists conf = new Configuration(); conf.set("raid.config.file", CONFIG_FILE); conf.setBoolean("raid.config.reload", true); conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL); // scan all policies once every 5 second conf.setLong("raid.policy.rescan.interval", 5000); // do not use map-reduce cluster for Raiding conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode"); conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort()); conf.set("mapred.raid.http.address", "localhost:0"); Utils.loadTestCodecs( conf, stripeLength, stripeLength, 1, 3, "/destraid", "/destraidrs", false, true); conf.setBoolean("dfs.permissions", false); // Make sure initial repl is smaller than NUM_DATANODES conf.setInt(RaidNode.RAID_PARITY_INITIAL_REPL_KEY, 1); dfsCluster = new MiniDFSCluster(conf, NUM_DATANODES, true, null); dfsCluster.waitActive(); fileSys = dfsCluster.getFileSystem(); namenode = fileSys.getUri().toString(); FileSystem.setDefaultUri(conf, namenode); mr = new MiniMRCluster(4, namenode, 3); jobTrackerName = "localhost:" + mr.getJobTrackerPort(); hftp = "hftp://localhost.localdomain:" + dfsCluster.getNameNodePort(); FileSystem.setDefaultUri(conf, namenode); conf.set("mapred.job.tracker", jobTrackerName); conf.set(RaidNode.RAID_CHECKSUM_STORE_CLASS_KEY, "org.apache.hadoop.raid.LocalChecksumStore"); conf.setBoolean(RaidNode.RAID_CHECKSUM_STORE_REQUIRED_KEY, true); conf.set(LocalChecksumStore.LOCAL_CHECK_STORE_DIR_KEY, CHECKSUM_STORE_DIR); conf.set(RaidNode.RAID_STRIPE_STORE_CLASS_KEY, "org.apache.hadoop.raid.LocalStripeStore"); conf.set(LocalStripeStore.LOCAL_STRIPE_STORE_DIR_KEY, STRIPE_STORE_DIR); ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addPolicy("RaidTest1", "/user/dhruba/raidtest", 1, 1); cb.addPolicy("RaidTest2", "/user/dhruba/raidtestrs", 1, 1, "rs"); cb.persist(); }
@Test(timeout = 120000) public void testFadviseSkippedForSmallReads() throws Exception { // start a cluster LOG.info("testFadviseSkippedForSmallReads"); tracker.clear(); Configuration conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY, true); conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY, true); MiniDFSCluster cluster = null; String TEST_PATH = "/test"; int TEST_PATH_LEN = MAX_TEST_FILE_LEN; FSDataInputStream fis = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); // create new file createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, null); // Since the DataNode was configured with drop-behind, and we didn't // specify any policy, we should have done drop-behind. ExtendedBlock block = cluster .getNameNode() .getRpcServer() .getBlockLocations(TEST_PATH, 0, Long.MAX_VALUE) .get(0) .getBlock(); String fadvisedFileName = cluster.getBlockFile(0, block).getName(); Stats stats = tracker.getStats(fadvisedFileName); stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE); stats.clear(); stats.assertNotDroppedInRange(0, TEST_PATH_LEN); // read file fis = fs.open(new Path(TEST_PATH)); byte buf[] = new byte[17]; fis.readFully(4096, buf, 0, buf.length); // we should not have dropped anything because of the small read. stats = tracker.getStats(fadvisedFileName); stats.assertNotDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE); } finally { IOUtils.cleanup(null, fis); if (cluster != null) { cluster.shutdown(); } } }
static { try { defaultConf = new Configuration(false); defaultConf.set("fs.defaultFS", "file:///"); defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); defaultConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true); defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 1); defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 1); localFs = FileSystem.getLocal(defaultConf); String stagingDir = "target" + Path.SEPARATOR + TestSpeculation.class.getName() + "-tmpDir"; defaultConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir); } catch (IOException e) { throw new RuntimeException("init failure", e); } }
private void testRMDownForJobStatusBeforeGetAMReport(Configuration conf, int noOfRetries) throws IOException { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); conf.setBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, !isAMReachableFromClient); MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class); try { when(rmDelegate.getApplicationReport(jobId.getAppId())) .thenThrow( new java.lang.reflect.UndeclaredThrowableException( new IOException("Connection refuced1"))) .thenThrow( new java.lang.reflect.UndeclaredThrowableException( new IOException("Connection refuced2"))) .thenThrow( new java.lang.reflect.UndeclaredThrowableException( new IOException("Connection refuced3"))); ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate(conf, rmDelegate, oldJobId, historyServerProxy); try { clientServiceDelegate.getJobStatus(oldJobId); Assert.fail("It should throw exception after retries"); } catch (IOException e) { System.out.println("fail to get job status,and e=" + e.toString()); } verify(rmDelegate, times(noOfRetries)).getApplicationReport(any(ApplicationId.class)); } catch (YarnException e) { throw new IOException(e); } }
@Test public void testRMDownRestoreForJobStatusBeforeGetAMReport() throws IOException { Configuration conf = new YarnConfiguration(); conf.setInt(MRJobConfig.MR_CLIENT_MAX_RETRIES, 3); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); conf.setBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, !isAMReachableFromClient); MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); when(historyServerProxy.getJobReport(any(GetJobReportRequest.class))) .thenReturn(getJobReportResponse()); ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class); try { when(rmDelegate.getApplicationReport(jobId.getAppId())) .thenThrow( new java.lang.reflect.UndeclaredThrowableException( new IOException("Connection refuced1"))) .thenThrow( new java.lang.reflect.UndeclaredThrowableException( new IOException("Connection refuced2"))) .thenReturn(getFinishedApplicationReport()); ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate(conf, rmDelegate, oldJobId, historyServerProxy); JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); verify(rmDelegate, times(3)).getApplicationReport(any(ApplicationId.class)); Assert.assertNotNull(jobStatus); } catch (YarnException e) { throw new IOException(e); } }
@BeforeClass public static void clusterSetupAtBegining() throws IOException, LoginException, URISyntaxException { SupportsBlocks = true; CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); cluster = new MiniDFSCluster.Builder(CONF) .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2)) .numDataNodes(2) .build(); cluster.waitClusterUp(); fHdfs = cluster.getFileSystem(0); fHdfs2 = cluster.getFileSystem(1); fHdfs .getConf() .set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, FsConstants.VIEWFS_URI.toString()); fHdfs2 .getConf() .set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, FsConstants.VIEWFS_URI.toString()); defaultWorkingDirectory = fHdfs.makeQualified( new Path("/user/" + UserGroupInformation.getCurrentUser().getShortUserName())); defaultWorkingDirectory2 = fHdfs2.makeQualified( new Path("/user/" + UserGroupInformation.getCurrentUser().getShortUserName())); fHdfs.mkdirs(defaultWorkingDirectory); fHdfs2.mkdirs(defaultWorkingDirectory2); }
/** @throws java.lang.Exception */ @BeforeClass public static void setUpBeforeClass() throws Exception { TEST_UTIL.startMiniZKCluster(); Configuration conf = TEST_UTIL.getConfiguration(); conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT); admin = new ReplicationAdmin(conf); }
@Test public void testFsLockFairness() throws IOException, InterruptedException { Configuration conf = new Configuration(); FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); FSImage fsImage = Mockito.mock(FSImage.class); Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); conf.setBoolean("dfs.namenode.fslock.fair", true); FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage); assertTrue(fsNamesystem.getFsLockForTests().isFair()); conf.setBoolean("dfs.namenode.fslock.fair", false); fsNamesystem = new FSNamesystem(conf, fsImage); assertFalse(fsNamesystem.getFsLockForTests().isFair()); }
@Test // First attempt is failed and second attempt is passed // The job succeeds. public void testFailTask() throws Exception { MRApp app = new MockFirstFailingAttemptMRApp(1, 0); Configuration conf = new Configuration(); // this test requires two task attempts, but uberization overrides max to 1 conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.SUCCEEDED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals( "Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", 2, attempts.size()); // one attempt must be failed // and another must have succeeded Iterator<TaskAttempt> it = attempts.values().iterator(); Assert.assertEquals( "Attempt state not correct", TaskAttemptState.FAILED, it.next().getReport().getTaskAttemptState()); Assert.assertEquals( "Attempt state not correct", TaskAttemptState.SUCCEEDED, it.next().getReport().getTaskAttemptState()); }
@Test // All Task attempts are timed out, leading to Job failure public void testTimedOutTask() throws Exception { MRApp app = new TimeOutTaskMRApp(1, 0); Configuration conf = new Configuration(); int maxAttempts = 2; conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts); // disable uberization (requires entire job to be reattempted, so max for // subtask attempts is overridden to 1) conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals( "Task state not correct", TaskState.FAILED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size()); for (TaskAttempt attempt : attempts.values()) { Assert.assertEquals( "Attempt state not correct", TaskAttemptState.FAILED, attempt.getReport().getTaskAttemptState()); } }
private static final void setParameter( final Configuration config, final Class<?> scope, final Object val, final ParameterEnum configItem) { if (val != null) { if (val instanceof Long) { config.setLong( GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Long) val)); } else if (val instanceof Double) { config.setDouble( GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Double) val)); } else if (val instanceof Boolean) { config.setBoolean( GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Boolean) val)); } else if (val instanceof Integer) { config.setInt( GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Integer) val)); } else if (val instanceof Class) { config.setClass( GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Class) val), ((Class) val)); } else if (val instanceof byte[]) { config.set( GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ByteArrayUtils.byteArrayToString((byte[]) val)); } else { config.set(GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), val.toString()); } } }
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) */ @SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { //Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
public void testFsShutdownHook() throws Exception { final Set<FileSystem> closed = Collections.synchronizedSet(new HashSet<FileSystem>()); Configuration conf = new Configuration(); Configuration confNoAuto = new Configuration(); conf.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class); confNoAuto.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class); confNoAuto.setBoolean("fs.automatic.close", false); TestShutdownFileSystem fsWithAuto = (TestShutdownFileSystem) (new Path("test://a/").getFileSystem(conf)); TestShutdownFileSystem fsWithoutAuto = (TestShutdownFileSystem) (new Path("test://b/").getFileSystem(confNoAuto)); fsWithAuto.setClosedSet(closed); fsWithoutAuto.setClosedSet(closed); // Different URIs should result in different FS instances assertNotSame(fsWithAuto, fsWithoutAuto); FileSystem.CACHE.closeAll(true); assertEquals(1, closed.size()); assertTrue(closed.contains(fsWithAuto)); closed.clear(); FileSystem.closeAll(); assertEquals(1, closed.size()); assertTrue(closed.contains(fsWithoutAuto)); }
@Test(timeout = 60000) public void testSymlinkHdfsDisable() throws Exception { Configuration conf = new HdfsConfiguration(); // disable symlink resolution conf.setBoolean(CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_KEY, false); // spin up minicluster, get dfs and filecontext MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); DistributedFileSystem dfs = cluster.getFileSystem(); FileContext fc = FileContext.getFileContext(cluster.getURI(0), conf); // Create test files/links FileContextTestHelper helper = new FileContextTestHelper("/tmp/TestSymlinkHdfsDisable"); Path root = helper.getTestRootPath(fc); Path target = new Path(root, "target"); Path link = new Path(root, "link"); DFSTestUtil.createFile(dfs, target, 4096, (short) 1, 0xDEADDEAD); fc.createSymlink(target, link, false); // Try to resolve links with FileSystem and FileContext try { fc.open(link); fail("Expected error when attempting to resolve link"); } catch (IOException e) { GenericTestUtils.assertExceptionContains("resolution is disabled", e); } try { dfs.open(link); fail("Expected error when attempting to resolve link"); } catch (IOException e) { GenericTestUtils.assertExceptionContains("resolution is disabled", e); } }
private static Configuration getConf() { Configuration conf = new Configuration(); String confDir = System.getProperty(HdfsDirectoryFactory.CONFIG_DIRECTORY); HdfsUtil.addHdfsResources(conf, confDir); conf.setBoolean("fs.hdfs.impl.disable.cache", true); return conf; }
@Override public void setLocation(String location, Job job) throws IOException { final Configuration configuration = job.getConfiguration(); // explicitly turning off combining splits. configuration.setBoolean("pig.noSplitCombination", true); this.initializePhoenixPigConfiguration(location, configuration); }
@Test(timeout = 60000) public void testExceptionDuringInitialization() throws Exception { Configuration conf = TEST_UTIL.getConfiguration(); conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2); // Let's fail fast. conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, true); conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, ""); TEST_UTIL.startMiniCluster(2); try { MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); // Trigger one regionserver to fail as if it came up with a coprocessor // that fails during initialization final HRegionServer regionServer = cluster.getRegionServer(0); conf.set( CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, FailedInitializationObserver.class.getName()); regionServer .getRegionServerCoprocessorHost() .loadSystemCoprocessors(conf, CoprocessorHost.REGION_COPROCESSOR_CONF_KEY); TEST_UTIL.waitFor( 10000, 1000, new Predicate<Exception>() { @Override public boolean evaluate() throws Exception { return regionServer.isAborted(); } }); } finally { TEST_UTIL.shutdownMiniCluster(); } }
/** * Performs an HDF to text operation as a MapReduce job and returns total number of points * generated. * * @param inPath * @param outPath * @param datasetName * @param skipFillValue * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static long HDFToTextMapReduce( Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "HDFToText"); Configuration conf = job.getConfiguration(); job.setJarByClass(HDFToText.class); job.setJobName("HDFToText"); // Set Map function details job.setMapperClass(HDFToTextMap.class); job.setNumReduceTasks(0); // Set input information job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class); conf.set("dataset", datasetName); conf.setBoolean("skipfillvalue", skipFillValue); // Set output information job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Run the job boolean verbose = conf.getBoolean("verbose", false); job.waitForCompletion(verbose); Counters counters = job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
@Test public void testConnectionPing() throws Exception { Configuration conf = new Configuration(); int pingInterval = 50; conf.setBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true); conf.setInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY, pingInterval); final Server server = new RPC.Builder(conf) .setProtocol(TestProtocol.class) .setInstance(new TestImpl()) .setBindAddress(ADDRESS) .setPort(0) .setNumHandlers(5) .setVerbose(true) .build(); server.start(); final TestProtocol proxy = RPC.getProxy(TestProtocol.class, TestProtocol.versionID, server.getListenerAddress(), conf); try { // this call will throw exception if server couldn't decode the ping proxy.sleep(pingInterval * 4); } finally { if (proxy != null) RPC.stopProxy(proxy); } }
/** Test that all open files are closed when client dies abnormally. */ public void testDFSClientDeath() throws IOException { Configuration conf = new Configuration(); System.out.println("Testing adbornal client death."); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fs = cluster.getFileSystem(); DistributedFileSystem dfs = (DistributedFileSystem) fs; DFSClient dfsclient = dfs.dfs; try { // create a new file in home directory. Do not close it. // Path file1 = new Path("/clienttest.dat"); FSDataOutputStream stm = createFile(fs, file1, 1); System.out.println("Created file clienttest.dat"); // write to file writeFile(stm); // close the dfsclient before closing the output stream. // This should close all existing file. dfsclient.close(); // reopen file system and verify that file exists. assertTrue( file1 + " does not exist.", AppendTestUtil.createHdfsWithDifferentUsername(conf).exists(file1)); } finally { cluster.shutdown(); } }
@Override public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException { DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst); options.setSyncFolder(true); options.setSkipCRC(true); options.preserve(FileAttribute.BLOCKSIZE); // Creates the command-line parameters for distcp String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()}; try { conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue // added by HADOOP-10459 if (distcp.run(params) == 0) { return true; } else { return false; } } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); } finally { conf.setBoolean("mapred.mapper.new-api", false); } }