@BeforeClass
 public static void init() throws Exception {
   conf = new HdfsConfiguration();
   conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, true);
   conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
   initCluster(true);
 }
 private static void setupConf(Configuration conf) {
   // enable snapshot support
   conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
   // disable the ui
   conf.setInt("hbase.regionsever.info.port", -1);
   // change the flush size to a small amount, regulating number of store files
   conf.setInt("hbase.hregion.memstore.flush.size", 25000);
   // so make sure we get a compaction when doing a load, but keep around
   // some files in the store
   conf.setInt("hbase.hstore.compaction.min", 10);
   conf.setInt("hbase.hstore.compactionThreshold", 10);
   // block writes if we get to 12 store files
   conf.setInt("hbase.hstore.blockingStoreFiles", 12);
   conf.setInt("hbase.regionserver.msginterval", 100);
   conf.setBoolean("hbase.master.enabletable.roundrobin", true);
   // Avoid potentially aggressive splitting which would cause snapshot to fail
   conf.set(
       HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
   // Execute cleaner frequently to induce failures
   conf.setInt("hbase.master.cleaner.interval", CLEANER_INTERVAL);
   conf.setInt("hbase.master.hfilecleaner.plugins.snapshot.period", CLEANER_INTERVAL);
   // Effectively disable TimeToLiveHFileCleaner. Don't want to fully disable it because that
   // will even trigger races between creating the directory containing back references and
   // the back reference itself.
   conf.setInt("hbase.master.hfilecleaner.ttl", CLEANER_INTERVAL);
 }
  @Override
  protected synchronized void startInternal() throws Exception {
    // create filesystem only now, as part of service-start. By this time, RM is
    // authenticated with kerberos so we are good to create a file-system
    // handle.
    fsConf = new Configuration(getConfig());
    fsConf.setBoolean("dfs.client.retry.policy.enabled", true);
    String retryPolicy =
        fsConf.get(
            YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC,
            YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC);
    fsConf.set("dfs.client.retry.policy.spec", retryPolicy);

    String scheme = fsWorkingPath.toUri().getScheme();
    if (scheme == null) {
      scheme = FileSystem.getDefaultUri(fsConf).getScheme();
    }
    if (scheme != null) {
      String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
      fsConf.setBoolean(disableCacheName, true);
    }

    fs = fsWorkingPath.getFileSystem(fsConf);
    mkdirsWithRetries(rmDTSecretManagerRoot);
    mkdirsWithRetries(rmAppRoot);
    mkdirsWithRetries(amrmTokenSecretManagerRoot);
    mkdirsWithRetries(reservationRoot);
  }
  /**
   * Merge all the partial {@link org.apache.mahout.math.RandomAccessSparseVector}s into the
   * complete Document {@link org.apache.mahout.math.RandomAccessSparseVector}
   *
   * @param partialVectorPaths input directory of the vectors in {@link
   *     org.apache.hadoop.io.SequenceFile} format
   * @param output output directory were the partial vectors have to be created
   * @param baseConf job configuration
   * @param normPower The normalization value. Must be greater than or equal to 0 or equal to {@link
   *     #NO_NORMALIZING}
   * @param dimension
   * @param sequentialAccess output vectors should be optimized for sequential access
   * @param namedVector output vectors should be named, retaining key (doc id) as a label
   * @param numReducers The number of reducers to spawn
   */
  public static void mergePartialVectors(
      Iterable<Path> partialVectorPaths,
      Path output,
      Configuration baseConf,
      float normPower,
      boolean logNormalize,
      int dimension,
      boolean sequentialAccess,
      boolean namedVector,
      int numReducers)
      throws IOException, InterruptedException, ClassNotFoundException {
    Preconditions.checkArgument(
        normPower == NO_NORMALIZING || normPower >= 0,
        "If specified normPower must be nonnegative",
        normPower);
    Preconditions.checkArgument(
        normPower == NO_NORMALIZING
            || (normPower > 1 && !Double.isInfinite(normPower))
            || !logNormalize,
        "normPower must be > 1 and not infinite if log normalization is chosen",
        normPower);

    Configuration conf = new Configuration(baseConf);
    // this conf parameter needs to be set enable serialisation of conf values
    conf.set(
        "io.serializations",
        "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setBoolean(SEQUENTIAL_ACCESS, sequentialAccess);
    conf.setBoolean(NAMED_VECTOR, namedVector);
    conf.setInt(DIMENSION, dimension);
    conf.setFloat(NORMALIZATION_POWER, normPower);
    conf.setBoolean(LOG_NORMALIZE, logNormalize);

    Job job = new Job(conf);
    job.setJobName("PartialVectorMerger::MergePartialVectors");
    job.setJarByClass(PartialVectorMerger.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);

    FileInputFormat.setInputPaths(job, getCommaSeparatedPaths(partialVectorPaths));

    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setReducerClass(PartialVectorMergeReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numReducers);

    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
      throw new IllegalStateException("Job failed!");
    }
  }
 @BeforeTest
 public void setupDriver() throws Exception {
   conf = new Configuration();
   conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C1");
   conf.setBoolean(CubeQueryConfUtil.DISABLE_AUTO_JOINS, false);
   conf.setBoolean(CubeQueryConfUtil.ENABLE_SELECT_TO_GROUPBY, true);
   conf.setBoolean(CubeQueryConfUtil.ENABLE_GROUP_BY_TO_SELECT, true);
   conf.setBoolean(CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER, false);
 }
  /**
   * If ramDiskStorageLimit is >=0, then RAM_DISK capacity is artificially capped. If
   * ramDiskStorageLimit < 0 then it is ignored.
   */
  protected final void startUpCluster(
      boolean hasTransientStorage,
      final int ramDiskReplicaCapacity,
      final boolean useSCR,
      final boolean useLegacyBlockReaderLocal)
      throws IOException {

    Configuration conf = new Configuration();
    conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(
        DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, LAZY_WRITE_FILE_SCRUBBER_INTERVAL_SEC);
    conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, HEARTBEAT_INTERVAL_SEC);
    conf.setInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, HEARTBEAT_RECHECK_INTERVAL_MSEC);
    conf.setInt(DFS_DATANODE_LAZY_WRITER_INTERVAL_SEC, LAZY_WRITER_INTERVAL_SEC);
    conf.setInt(DFS_DATANODE_RAM_DISK_LOW_WATERMARK_BYTES, EVICTION_LOW_WATERMARK * BLOCK_SIZE);

    if (useSCR) {
      conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
      // Do not share a client context across tests.
      conf.set(DFS_CLIENT_CONTEXT, UUID.randomUUID().toString());
      if (useLegacyBlockReaderLocal) {
        conf.setBoolean(DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
        conf.set(
            DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
            UserGroupInformation.getCurrentUser().getShortUserName());
      } else {
        sockDir = new TemporarySocketDirectory();
        conf.set(
            DFS_DOMAIN_SOCKET_PATH_KEY,
            new File(sockDir.getDir(), this.getClass().getSimpleName() + "._PORT.sock")
                .getAbsolutePath());
      }
    }

    long[] capacities = null;
    if (hasTransientStorage && ramDiskReplicaCapacity >= 0) {
      // Convert replica count to byte count, add some delta for .meta and
      // VERSION files.
      long ramDiskStorageLimit = ((long) ramDiskReplicaCapacity * BLOCK_SIZE) + (BLOCK_SIZE - 1);
      capacities = new long[] {ramDiskStorageLimit, -1};
    }

    cluster =
        new MiniDFSCluster.Builder(conf)
            .numDataNodes(REPL_FACTOR)
            .storageCapacities(capacities)
            .storageTypes(hasTransientStorage ? new StorageType[] {RAM_DISK, DEFAULT} : null)
            .build();
    fs = cluster.getFileSystem();
    client = fs.getClient();
    try {
      jmx = initJMX();
    } catch (Exception e) {
      fail("Failed initialize JMX for testing: " + e);
    }
    LOG.info("Cluster startup complete");
  }
  /**
   * Run the job
   *
   * @param input the input pathname String
   * @param output the output pathname String
   * @param catFile the file containing the Wikipedia categories
   * @param exactMatchOnly if true, then the Wikipedia category must match exactly instead of simply
   *     containing the category string
   * @param all if true select all categories
   * @throws ClassNotFoundException
   * @throws InterruptedException
   */
  public static void runJob(
      String input, String output, String catFile, boolean exactMatchOnly, boolean all)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");
    conf.setBoolean("exact.match.only", exactMatchOnly);
    conf.setBoolean("all.files", all);
    conf.set(
        "io.serializations",
        "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");

    Job job = new Job(conf);
    if (WikipediaToSequenceFile.log.isInfoEnabled()) {
      log.info(
          "Input: " + input + " Out: " + output + " Categories: " + catFile + " All Files: " + all);
    }
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(job, new Path(input));
    Path outPath = new Path(output);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setMapperClass(WikipediaMapper.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(WikipediaToSequenceFile.class);

    /*
     * conf.set("mapred.compress.map.output", "true"); conf.set("mapred.map.output.compression.type",
     * "BLOCK"); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression.type",
     * "BLOCK"); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
     */
    HadoopUtil.overwriteOutput(outPath);

    Set<String> categories = new HashSet<String>();
    if (catFile.length() > 0) {
      for (String line : new FileLineIterable(new File(catFile))) {
        categories.add(line.trim().toLowerCase(Locale.ENGLISH));
      }
    }

    DefaultStringifier<Set<String>> setStringifier =
        new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));

    String categoriesStr = setStringifier.toString(categories);

    conf.set("wikipedia.categories", categoriesStr);

    job.waitForCompletion(true);
  }
Exemple #8
0
  /**
   * Create a partial tfidf vector using a chunk of features from the input vectors. The input
   * vectors has to be in the {@link SequenceFile} format
   *
   * @param input input directory of the vectors in {@link SequenceFile} format
   * @param featureCount Number of unique features in the dataset
   * @param vectorCount Number of vectors in the dataset
   * @param minDf The minimum document frequency. Default 1
   * @param maxDF The max percentage of vectors for the DF. Can be used to remove really high
   *     frequency features. Expressed as an integer between 0 and 100. Default 99
   * @param dictionaryFilePath location of the chunk of features and the id's
   * @param output output directory were the partial vectors have to be created
   * @param sequentialAccess output vectors should be optimized for sequential access
   * @param namedVector output vectors should be named, retaining key (doc id) as a label
   */
  private static void makePartialVectors(
      Path input,
      Configuration baseConf,
      Long featureCount,
      Long vectorCount,
      int minDf,
      long maxDF,
      Path dictionaryFilePath,
      Path output,
      boolean sequentialAccess,
      boolean namedVector)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration(baseConf);
    // this conf parameter needs to be set enable serialisation of conf values
    conf.set(
        "io.serializations",
        "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setLong(FEATURE_COUNT, featureCount);
    conf.setLong(VECTOR_COUNT, vectorCount);
    conf.setInt(MIN_DF, minDf);
    conf.setLong(MAX_DF, maxDF);
    conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess);
    conf.setBoolean(PartialVectorMerger.NAMED_VECTOR, namedVector);
    DistributedCache.addCacheFile(dictionaryFilePath.toUri(), conf);

    Job job = new Job(conf);
    job.setJobName(
        ": MakePartialVectors: input-folder: "
            + input
            + ", dictionary-file: "
            + dictionaryFilePath.toString());
    job.setJarByClass(TFIDFConverter.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    FileInputFormat.setInputPaths(job, input);

    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setReducerClass(TFIDFPartialVectorReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
      throw new IllegalStateException("Job failed!");
    }
  }
 @BeforeTest
 public void setupDriver() throws Exception {
   conf = new Configuration();
   conf.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C1,C2");
   conf.setBoolean(CubeQueryConfUtil.DISABLE_AUTO_JOINS, false);
   conf.setBoolean(CubeQueryConfUtil.ENABLE_SELECT_TO_GROUPBY, true);
   conf.setBoolean(CubeQueryConfUtil.ENABLE_GROUP_BY_TO_SELECT, true);
   conf.setBoolean(CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER, false);
   conf.setClass(
       CubeQueryConfUtil.TIME_RANGE_WRITER_CLASS,
       BetweenTimeRangeWriter.class.asSubclass(TimeRangeWriter.class),
       TimeRangeWriter.class);
 }
  private void mySetup(int stripeLength) throws Exception {
    if (System.getProperty("hadoop.log.dir") == null) {
      String base = new File(".").getAbsolutePath();
      System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs");
    }

    new File(TEST_DIR).mkdirs(); // Make sure data directory exists
    conf = new Configuration();

    conf.set("raid.config.file", CONFIG_FILE);
    conf.setBoolean("raid.config.reload", true);
    conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);

    // scan all policies once every 5 second
    conf.setLong("raid.policy.rescan.interval", 5000);

    // do not use map-reduce cluster for Raiding
    conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
    conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort());
    conf.set("mapred.raid.http.address", "localhost:0");

    Utils.loadTestCodecs(
        conf, stripeLength, stripeLength, 1, 3, "/destraid", "/destraidrs", false, true);

    conf.setBoolean("dfs.permissions", false);
    // Make sure initial repl is smaller than NUM_DATANODES
    conf.setInt(RaidNode.RAID_PARITY_INITIAL_REPL_KEY, 1);

    dfsCluster = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
    dfsCluster.waitActive();
    fileSys = dfsCluster.getFileSystem();
    namenode = fileSys.getUri().toString();

    FileSystem.setDefaultUri(conf, namenode);
    mr = new MiniMRCluster(4, namenode, 3);
    jobTrackerName = "localhost:" + mr.getJobTrackerPort();
    hftp = "hftp://localhost.localdomain:" + dfsCluster.getNameNodePort();

    FileSystem.setDefaultUri(conf, namenode);
    conf.set("mapred.job.tracker", jobTrackerName);
    conf.set(RaidNode.RAID_CHECKSUM_STORE_CLASS_KEY, "org.apache.hadoop.raid.LocalChecksumStore");
    conf.setBoolean(RaidNode.RAID_CHECKSUM_STORE_REQUIRED_KEY, true);
    conf.set(LocalChecksumStore.LOCAL_CHECK_STORE_DIR_KEY, CHECKSUM_STORE_DIR);
    conf.set(RaidNode.RAID_STRIPE_STORE_CLASS_KEY, "org.apache.hadoop.raid.LocalStripeStore");
    conf.set(LocalStripeStore.LOCAL_STRIPE_STORE_DIR_KEY, STRIPE_STORE_DIR);
    ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
    cb.addPolicy("RaidTest1", "/user/dhruba/raidtest", 1, 1);
    cb.addPolicy("RaidTest2", "/user/dhruba/raidtestrs", 1, 1, "rs");
    cb.persist();
  }
  @Test(timeout = 120000)
  public void testFadviseSkippedForSmallReads() throws Exception {
    // start a cluster
    LOG.info("testFadviseSkippedForSmallReads");
    tracker.clear();
    Configuration conf = new HdfsConfiguration();
    conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY, true);
    conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY, true);
    MiniDFSCluster cluster = null;
    String TEST_PATH = "/test";
    int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
    FSDataInputStream fis = null;
    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
      cluster.waitActive();
      FileSystem fs = cluster.getFileSystem();

      // create new file
      createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, null);
      // Since the DataNode was configured with drop-behind, and we didn't
      // specify any policy, we should have done drop-behind.
      ExtendedBlock block =
          cluster
              .getNameNode()
              .getRpcServer()
              .getBlockLocations(TEST_PATH, 0, Long.MAX_VALUE)
              .get(0)
              .getBlock();
      String fadvisedFileName = cluster.getBlockFile(0, block).getName();
      Stats stats = tracker.getStats(fadvisedFileName);
      stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
      stats.clear();
      stats.assertNotDroppedInRange(0, TEST_PATH_LEN);

      // read file
      fis = fs.open(new Path(TEST_PATH));
      byte buf[] = new byte[17];
      fis.readFully(4096, buf, 0, buf.length);

      // we should not have dropped anything because of the small read.
      stats = tracker.getStats(fadvisedFileName);
      stats.assertNotDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
    } finally {
      IOUtils.cleanup(null, fis);
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
 static {
   try {
     defaultConf = new Configuration(false);
     defaultConf.set("fs.defaultFS", "file:///");
     defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
     defaultConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true);
     defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 1);
     defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 1);
     localFs = FileSystem.getLocal(defaultConf);
     String stagingDir = "target" + Path.SEPARATOR + TestSpeculation.class.getName() + "-tmpDir";
     defaultConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir);
   } catch (IOException e) {
     throw new RuntimeException("init failure", e);
   }
 }
 private void testRMDownForJobStatusBeforeGetAMReport(Configuration conf, int noOfRetries)
     throws IOException {
   conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
   conf.setBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, !isAMReachableFromClient);
   MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
   ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
   try {
     when(rmDelegate.getApplicationReport(jobId.getAppId()))
         .thenThrow(
             new java.lang.reflect.UndeclaredThrowableException(
                 new IOException("Connection refuced1")))
         .thenThrow(
             new java.lang.reflect.UndeclaredThrowableException(
                 new IOException("Connection refuced2")))
         .thenThrow(
             new java.lang.reflect.UndeclaredThrowableException(
                 new IOException("Connection refuced3")));
     ClientServiceDelegate clientServiceDelegate =
         new ClientServiceDelegate(conf, rmDelegate, oldJobId, historyServerProxy);
     try {
       clientServiceDelegate.getJobStatus(oldJobId);
       Assert.fail("It should throw exception after retries");
     } catch (IOException e) {
       System.out.println("fail to get job status,and e=" + e.toString());
     }
     verify(rmDelegate, times(noOfRetries)).getApplicationReport(any(ApplicationId.class));
   } catch (YarnException e) {
     throw new IOException(e);
   }
 }
  @Test
  public void testRMDownRestoreForJobStatusBeforeGetAMReport() throws IOException {
    Configuration conf = new YarnConfiguration();
    conf.setInt(MRJobConfig.MR_CLIENT_MAX_RETRIES, 3);

    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
    conf.setBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, !isAMReachableFromClient);
    MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
    when(historyServerProxy.getJobReport(any(GetJobReportRequest.class)))
        .thenReturn(getJobReportResponse());
    ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
    try {
      when(rmDelegate.getApplicationReport(jobId.getAppId()))
          .thenThrow(
              new java.lang.reflect.UndeclaredThrowableException(
                  new IOException("Connection refuced1")))
          .thenThrow(
              new java.lang.reflect.UndeclaredThrowableException(
                  new IOException("Connection refuced2")))
          .thenReturn(getFinishedApplicationReport());
      ClientServiceDelegate clientServiceDelegate =
          new ClientServiceDelegate(conf, rmDelegate, oldJobId, historyServerProxy);
      JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
      verify(rmDelegate, times(3)).getApplicationReport(any(ApplicationId.class));
      Assert.assertNotNull(jobStatus);
    } catch (YarnException e) {
      throw new IOException(e);
    }
  }
  @BeforeClass
  public static void clusterSetupAtBegining()
      throws IOException, LoginException, URISyntaxException {
    SupportsBlocks = true;
    CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);

    cluster =
        new MiniDFSCluster.Builder(CONF)
            .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
            .numDataNodes(2)
            .build();
    cluster.waitClusterUp();

    fHdfs = cluster.getFileSystem(0);
    fHdfs2 = cluster.getFileSystem(1);
    fHdfs
        .getConf()
        .set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, FsConstants.VIEWFS_URI.toString());
    fHdfs2
        .getConf()
        .set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, FsConstants.VIEWFS_URI.toString());

    defaultWorkingDirectory =
        fHdfs.makeQualified(
            new Path("/user/" + UserGroupInformation.getCurrentUser().getShortUserName()));
    defaultWorkingDirectory2 =
        fHdfs2.makeQualified(
            new Path("/user/" + UserGroupInformation.getCurrentUser().getShortUserName()));

    fHdfs.mkdirs(defaultWorkingDirectory);
    fHdfs2.mkdirs(defaultWorkingDirectory2);
  }
 /** @throws java.lang.Exception */
 @BeforeClass
 public static void setUpBeforeClass() throws Exception {
   TEST_UTIL.startMiniZKCluster();
   Configuration conf = TEST_UTIL.getConfiguration();
   conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT);
   admin = new ReplicationAdmin(conf);
 }
  @Test
  public void testFsLockFairness() throws IOException, InterruptedException {
    Configuration conf = new Configuration();

    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
    FSImage fsImage = Mockito.mock(FSImage.class);
    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);

    conf.setBoolean("dfs.namenode.fslock.fair", true);
    FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage);
    assertTrue(fsNamesystem.getFsLockForTests().isFair());

    conf.setBoolean("dfs.namenode.fslock.fair", false);
    fsNamesystem = new FSNamesystem(conf, fsImage);
    assertFalse(fsNamesystem.getFsLockForTests().isFair());
  }
 @Test
 // First attempt is failed and second attempt is passed
 // The job succeeds.
 public void testFailTask() throws Exception {
   MRApp app = new MockFirstFailingAttemptMRApp(1, 0);
   Configuration conf = new Configuration();
   // this test requires two task attempts, but uberization overrides max to 1
   conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
   Job job = app.submit(conf);
   app.waitForState(job, JobState.SUCCEEDED);
   Map<TaskId, Task> tasks = job.getTasks();
   Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
   Task task = tasks.values().iterator().next();
   Assert.assertEquals(
       "Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState());
   Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
   Assert.assertEquals("Num attempts is not correct", 2, attempts.size());
   // one attempt must be failed
   // and another must have succeeded
   Iterator<TaskAttempt> it = attempts.values().iterator();
   Assert.assertEquals(
       "Attempt state not correct",
       TaskAttemptState.FAILED,
       it.next().getReport().getTaskAttemptState());
   Assert.assertEquals(
       "Attempt state not correct",
       TaskAttemptState.SUCCEEDED,
       it.next().getReport().getTaskAttemptState());
 }
 @Test
 // All Task attempts are timed out, leading to Job failure
 public void testTimedOutTask() throws Exception {
   MRApp app = new TimeOutTaskMRApp(1, 0);
   Configuration conf = new Configuration();
   int maxAttempts = 2;
   conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
   // disable uberization (requires entire job to be reattempted, so max for
   // subtask attempts is overridden to 1)
   conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
   Job job = app.submit(conf);
   app.waitForState(job, JobState.FAILED);
   Map<TaskId, Task> tasks = job.getTasks();
   Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
   Task task = tasks.values().iterator().next();
   Assert.assertEquals(
       "Task state not correct", TaskState.FAILED, task.getReport().getTaskState());
   Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
   Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size());
   for (TaskAttempt attempt : attempts.values()) {
     Assert.assertEquals(
         "Attempt state not correct",
         TaskAttemptState.FAILED,
         attempt.getReport().getTaskAttemptState());
   }
 }
 private static final void setParameter(
     final Configuration config,
     final Class<?> scope,
     final Object val,
     final ParameterEnum configItem) {
   if (val != null) {
     if (val instanceof Long) {
       config.setLong(
           GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Long) val));
     } else if (val instanceof Double) {
       config.setDouble(
           GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Double) val));
     } else if (val instanceof Boolean) {
       config.setBoolean(
           GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Boolean) val));
     } else if (val instanceof Integer) {
       config.setInt(
           GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), ((Integer) val));
     } else if (val instanceof Class) {
       config.setClass(
           GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()),
           ((Class) val),
           ((Class) val));
     } else if (val instanceof byte[]) {
       config.set(
           GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()),
           ByteArrayUtils.byteArrayToString((byte[]) val));
     } else {
       config.set(GeoWaveConfiguratorBase.enumToConfKey(scope, configItem.self()), val.toString());
     }
   }
 }
  /**
   * Create a map-only Hadoop Job out of the passed in parameters.  Does not set the
   * Job name.
   *
   * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
   */
  @SuppressWarnings("rawtypes")
	public static Job prepareJob(Path inputPath,
                                 Path outputPath,
                                 Class<? extends InputFormat> inputFormat,
                                 Class<? extends Mapper> mapper,
                                 Class<? extends Writable> mapperKey,
                                 Class<? extends Writable> mapperValue,
                                 Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    //Job job = new Job(new Configuration(conf));
  	Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
  }
  public void testFsShutdownHook() throws Exception {
    final Set<FileSystem> closed = Collections.synchronizedSet(new HashSet<FileSystem>());
    Configuration conf = new Configuration();
    Configuration confNoAuto = new Configuration();

    conf.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
    confNoAuto.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
    confNoAuto.setBoolean("fs.automatic.close", false);

    TestShutdownFileSystem fsWithAuto =
        (TestShutdownFileSystem) (new Path("test://a/").getFileSystem(conf));
    TestShutdownFileSystem fsWithoutAuto =
        (TestShutdownFileSystem) (new Path("test://b/").getFileSystem(confNoAuto));

    fsWithAuto.setClosedSet(closed);
    fsWithoutAuto.setClosedSet(closed);

    // Different URIs should result in different FS instances
    assertNotSame(fsWithAuto, fsWithoutAuto);

    FileSystem.CACHE.closeAll(true);
    assertEquals(1, closed.size());
    assertTrue(closed.contains(fsWithAuto));

    closed.clear();

    FileSystem.closeAll();
    assertEquals(1, closed.size());
    assertTrue(closed.contains(fsWithoutAuto));
  }
  @Test(timeout = 60000)
  public void testSymlinkHdfsDisable() throws Exception {
    Configuration conf = new HdfsConfiguration();
    // disable symlink resolution
    conf.setBoolean(CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_KEY, false);
    // spin up minicluster, get dfs and filecontext
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    DistributedFileSystem dfs = cluster.getFileSystem();
    FileContext fc = FileContext.getFileContext(cluster.getURI(0), conf);
    // Create test files/links
    FileContextTestHelper helper = new FileContextTestHelper("/tmp/TestSymlinkHdfsDisable");
    Path root = helper.getTestRootPath(fc);
    Path target = new Path(root, "target");
    Path link = new Path(root, "link");
    DFSTestUtil.createFile(dfs, target, 4096, (short) 1, 0xDEADDEAD);
    fc.createSymlink(target, link, false);

    // Try to resolve links with FileSystem and FileContext
    try {
      fc.open(link);
      fail("Expected error when attempting to resolve link");
    } catch (IOException e) {
      GenericTestUtils.assertExceptionContains("resolution is disabled", e);
    }
    try {
      dfs.open(link);
      fail("Expected error when attempting to resolve link");
    } catch (IOException e) {
      GenericTestUtils.assertExceptionContains("resolution is disabled", e);
    }
  }
 private static Configuration getConf() {
   Configuration conf = new Configuration();
   String confDir = System.getProperty(HdfsDirectoryFactory.CONFIG_DIRECTORY);
   HdfsUtil.addHdfsResources(conf, confDir);
   conf.setBoolean("fs.hdfs.impl.disable.cache", true);
   return conf;
 }
 @Override
 public void setLocation(String location, Job job) throws IOException {
   final Configuration configuration = job.getConfiguration();
   // explicitly turning off combining splits.
   configuration.setBoolean("pig.noSplitCombination", true);
   this.initializePhoenixPigConfiguration(location, configuration);
 }
 @Test(timeout = 60000)
 public void testExceptionDuringInitialization() throws Exception {
   Configuration conf = TEST_UTIL.getConfiguration();
   conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2); // Let's fail fast.
   conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, true);
   conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
   TEST_UTIL.startMiniCluster(2);
   try {
     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
     // Trigger one regionserver to fail as if it came up with a coprocessor
     // that fails during initialization
     final HRegionServer regionServer = cluster.getRegionServer(0);
     conf.set(
         CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
         FailedInitializationObserver.class.getName());
     regionServer
         .getRegionServerCoprocessorHost()
         .loadSystemCoprocessors(conf, CoprocessorHost.REGION_COPROCESSOR_CONF_KEY);
     TEST_UTIL.waitFor(
         10000,
         1000,
         new Predicate<Exception>() {
           @Override
           public boolean evaluate() throws Exception {
             return regionServer.isAborted();
           }
         });
   } finally {
     TEST_UTIL.shutdownMiniCluster();
   }
 }
Exemple #27
0
  /**
   * Performs an HDF to text operation as a MapReduce job and returns total number of points
   * generated.
   *
   * @param inPath
   * @param outPath
   * @param datasetName
   * @param skipFillValue
   * @return
   * @throws IOException
   * @throws ClassNotFoundException
   * @throws InterruptedException
   */
  public static long HDFToTextMapReduce(
      Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "HDFToText");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(HDFToText.class);
    job.setJobName("HDFToText");

    // Set Map function details
    job.setMapperClass(HDFToTextMap.class);
    job.setNumReduceTasks(0);

    // Set input information
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPath);
    if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class);
    conf.set("dataset", datasetName);
    conf.setBoolean("skipfillvalue", skipFillValue);

    // Set output information
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outPath);

    // Run the job
    boolean verbose = conf.getBoolean("verbose", false);
    job.waitForCompletion(verbose);
    Counters counters = job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    return resultCount;
  }
  @Test
  public void testConnectionPing() throws Exception {
    Configuration conf = new Configuration();
    int pingInterval = 50;
    conf.setBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true);
    conf.setInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY, pingInterval);
    final Server server =
        new RPC.Builder(conf)
            .setProtocol(TestProtocol.class)
            .setInstance(new TestImpl())
            .setBindAddress(ADDRESS)
            .setPort(0)
            .setNumHandlers(5)
            .setVerbose(true)
            .build();
    server.start();

    final TestProtocol proxy =
        RPC.getProxy(TestProtocol.class, TestProtocol.versionID, server.getListenerAddress(), conf);
    try {
      // this call will throw exception if server couldn't decode the ping
      proxy.sleep(pingInterval * 4);
    } finally {
      if (proxy != null) RPC.stopProxy(proxy);
    }
  }
  /** Test that all open files are closed when client dies abnormally. */
  public void testDFSClientDeath() throws IOException {
    Configuration conf = new Configuration();
    System.out.println("Testing adbornal client death.");
    if (simulatedStorage) {
      conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
    }
    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
    FileSystem fs = cluster.getFileSystem();
    DistributedFileSystem dfs = (DistributedFileSystem) fs;
    DFSClient dfsclient = dfs.dfs;
    try {

      // create a new file in home directory. Do not close it.
      //
      Path file1 = new Path("/clienttest.dat");
      FSDataOutputStream stm = createFile(fs, file1, 1);
      System.out.println("Created file clienttest.dat");

      // write to file
      writeFile(stm);

      // close the dfsclient before closing the output stream.
      // This should close all existing file.
      dfsclient.close();

      // reopen file system and verify that file exists.
      assertTrue(
          file1 + " does not exist.",
          AppendTestUtil.createHdfsWithDifferentUsername(conf).exists(file1));
    } finally {
      cluster.shutdown();
    }
  }
Exemple #30
-1
  @Override
  public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {

    DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
    options.setSyncFolder(true);
    options.setSkipCRC(true);
    options.preserve(FileAttribute.BLOCKSIZE);

    // Creates the command-line parameters for distcp
    String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()};

    try {
      conf.setBoolean("mapred.mapper.new-api", true);
      DistCp distcp = new DistCp(conf, options);

      // HIVE-13704 states that we should use run() instead of execute() due to a hadoop known issue
      // added by HADOOP-10459
      if (distcp.run(params) == 0) {
        return true;
      } else {
        return false;
      }
    } catch (Exception e) {
      throw new IOException("Cannot execute DistCp process: " + e, e);
    } finally {
      conf.setBoolean("mapred.mapper.new-api", false);
    }
  }