예제 #1
0
  @Test
  public void testDeleteMissing() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String sourceBase;
    String targetBase;
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      fs.rename(new Path(targetBaseAdd), new Path(targetBase));

      DistCpOptions options =
          new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
      options.setSyncFolder(true);
      options.setDeleteMissing(true);
      options.appendToConf(conf);

      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
      listing.buildListing(listingFile, options);

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

      committer.commitJob(jobContext);
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
        Assert.fail("Source and target folders are not in sync");
      }

      // Test for idempotent commit
      committer.commitJob(jobContext);
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
    } catch (Throwable e) {
      LOG.error("Exception encountered while testing for delete missing", e);
      Assert.fail("Delete missing failure");
    } finally {
      TestDistCpUtils.delete(fs, "/tmp1");
      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
    }
  }
예제 #2
0
  @Test
  public void testExitStatus() {
    when(execution.getExitStatus()).thenReturn(new ExitStatus("exit"));

    assertEquals("exit", context.getExitStatus());

    context.setExitStatus("my exit status");

    verify(execution).setExitStatus(new ExitStatus("my exit status"));
  }
예제 #3
0
  /**
   * Generate the list of files and make them into FileSplits. This needs to be copied to insert a
   * filter on acceptable data
   */
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    long desiredMappers =
        job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> fileStatuses = listStatus(job);
    boolean forceNumberMappers = fileStatuses.size() == 1;
    for (FileStatus file : fileStatuses) {
      Path path = file.getPath();
      if (!isPathAcceptable(path)) // filter acceptable data
      continue;
      FileSystem fs = path.getFileSystem(job.getConfiguration());
      long length = file.getLen();
      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
      if ((length != 0) && isSplitable(job, path)) {
        long blockSize = file.getBlockSize();
        // use desired mappers to force more splits
        if (forceNumberMappers && desiredMappers > 0)
          maxSize = Math.min(maxSize, (length / desiredMappers));

        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (withinSlop(splitSize, bytesRemaining)) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
          splits.add(
              new FileSplit(
                  path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;
        }

        if (bytesRemaining != 0) {
          splits.add(
              new FileSplit(
                  path,
                  length - bytesRemaining,
                  bytesRemaining,
                  blkLocations[blkLocations.length - 1].getHosts()));
        }
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
      }
    }
    System.out.println("Total # of splits: " + splits.size());
    //     LOG.debug("Total # of splits: " + splits.size());
    return splits;
  }
예제 #4
0
  @Test
  public void testPreserveStatus() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String sourceBase;
    String targetBase;
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      FsPermission sourcePerm = new FsPermission((short) 511);
      FsPermission initialPerm = new FsPermission((short) 448);
      sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
      targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);

      DistCpOptions options =
          new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
      options.preserve(FileAttribute.PERMISSION);
      options.appendToConf(conf);

      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
      listing.buildListing(listingFile, options);

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);

      committer.commitJob(jobContext);
      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
        Assert.fail("Permission don't match");
      }

      // Test for idempotent commit
      committer.commitJob(jobContext);
      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
        Assert.fail("Permission don't match");
      }

    } catch (IOException e) {
      LOG.error("Exception encountered while testing for preserve status", e);
      Assert.fail("Preserve status failure");
    } finally {
      TestDistCpUtils.delete(fs, "/tmp1");
    }
  }
예제 #5
0
  @SuppressWarnings("unchecked")
  private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);

    List<InputSplit> splits = input.getSplits(job);
    T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array);
    //// num of split. the same as num of maps
    return array.length;
  }
예제 #6
0
  @Test
  public void testAtomicCommitExistingFinal() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
    String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      fs.mkdirs(new Path(workPath));
      fs.mkdirs(new Path(finalPath));

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
      conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);

      Assert.assertTrue(fs.exists(new Path(workPath)));
      Assert.assertTrue(fs.exists(new Path(finalPath)));
      try {
        committer.commitJob(jobContext);
        Assert.fail("Should not be able to atomic-commit to pre-existing path.");
      } catch (Exception exception) {
        Assert.assertTrue(fs.exists(new Path(workPath)));
        Assert.assertTrue(fs.exists(new Path(finalPath)));
        LOG.info("Atomic-commit Test pass.");
      }

    } catch (IOException e) {
      LOG.error("Exception encountered while testing for atomic commit.", e);
      Assert.fail("Atomic commit failure");
    } finally {
      TestDistCpUtils.delete(fs, workPath);
      TestDistCpUtils.delete(fs, finalPath);
    }
  }
  /**
   * Implementation of InputFormat::getSplits(). Returns a list of InputSplits, such that the number
   * of bytes to be copied for all the splits are approximately equal.
   *
   * @param context JobContext for the job.
   * @return The list of uniformly-distributed input-splits.
   * @throws IOException
   * @throws InterruptedException
   */
  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();
    int numSplits = DistCpUtils.getInt(configuration, JobContext.NUM_MAPS);

    if (numSplits == 0) return new ArrayList<InputSplit>();

    return getSplits(
        configuration,
        numSplits,
        DistCpUtils.getLong(configuration, DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
  }
예제 #8
0
  /**
   * Validates map phase progress after each record is processed by map task using custom task
   * reporter.
   */
  public void testMapProgress() throws Exception {
    JobConf job = new JobConf();
    fs = FileSystem.getLocal(job);
    Path rootDir = new Path(TEST_ROOT_DIR);
    createInputFile(rootDir);

    job.setNumReduceTasks(0);
    TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0");
    job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class);
    job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR);
    jobId = taskId.getJobID();

    JobContext jContext = new JobContextImpl(job, jobId);
    InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

    List<InputSplit> splits = input.getSplits(jContext);
    JobSplitWriter.createSplitFiles(
        new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits);
    TaskSplitMetaInfo[] splitMetaInfo =
        SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
    job.setUseNewMapper(true); // use new api
    for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1
      map =
          new TestMapTask(
              job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml",
              taskId,
              i,
              splitMetaInfo[i].getSplitIndex(),
              1);

      JobConf localConf = new JobConf(job);
      map.localizeConfiguration(localConf);
      map.setConf(localConf);
      map.run(localConf, fakeUmbilical);
    }
    // clean up
    fs.delete(rootDir, true);
  }
예제 #9
0
  @SuppressWarnings("rawtypes")
	public static String getCustomJobName(String className, JobContext job,
                                  Class<? extends Mapper> mapper,
                                  Class<? extends Reducer> reducer) {
    StringBuilder name = new StringBuilder(100);
    String customJobName = job.getJobName();
    if (customJobName == null || customJobName.trim().isEmpty()) {
      name.append(className);
    } else {
      name.append(customJobName);
    }
    name.append('-').append(mapper.getSimpleName());
    if (reducer != null) {
    	name.append('-').append(reducer.getSimpleName());
    }
    return name.toString();
  }
  /**
   * Set all the initial parameters needed in this class for connectivity out to Accumulo.
   *
   * @param context
   */
  private void initialize(JobContext context) { // Configuration conf){

    Configuration conf = context.getConfiguration();
    try {
      // output zoom level
      log.info("Working from zoom level = " + zoomLevel);
      if (zoomLevel == -1) {
        zoomLevel = Integer.parseInt(conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL));
      }

      table = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE);
      username = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER);
      instanceName = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE);
      zooKeepers = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS);

      String pl = conf.get(MrGeoConstants.MRGEO_PROTECTION_LEVEL);
      if (pl != null) {
        colViz = new ColumnVisibility(pl);
      } else if (colViz == null) {
        vizStr = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ);

        if (vizStr == null) {
          colViz = new ColumnVisibility();
        } else {
          colViz = new ColumnVisibility(vizStr);
        }
      }

      password = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD);
      String isEnc = conf.get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, "false");
      if (isEnc.equalsIgnoreCase("true")) {
        password = Base64Utils.decodeToString(password);
      }

      if (_innerFormat != null) {
        return;
      }

      _innerFormat = AccumuloOutputFormat.class.newInstance();
      AuthenticationToken token = new PasswordToken(password.getBytes());
      //      log.info("Setting output with: u = " + username);
      //      log.info("Setting output with: p = " + password);
      //      log.info("Setting output with: i = " + instanceName);
      //      log.info("Setting output with: z = " + zooKeepers);

      boolean connSet = ConfiguratorBase.isConnectorInfoSet(AccumuloOutputFormat.class, conf);
      if (!connSet) {
        // job not always available - do it how Accumulo does it
        OutputConfigurator.setConnectorInfo(AccumuloOutputFormat.class, conf, username, token);
        ClientConfiguration cc = ClientConfiguration.loadDefault().withInstance(instanceName);
        cc.setProperty(ClientProperty.INSTANCE_ZK_HOST, zooKeepers);

        OutputConfigurator.setZooKeeperInstance(AccumuloOutputFormat.class, conf, cc);
        OutputConfigurator.setDefaultTableName(AccumuloOutputFormat.class, conf, table);
        OutputConfigurator.setCreateTables(AccumuloOutputFormat.class, conf, true);

        outputInfoSet = true;
      }
    } catch (InstantiationException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IllegalAccessException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (AccumuloSecurityException ase) {
      ase.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  } // end initialize
예제 #11
0
  @Test
  public void testGetBatchStatus() {
    when(execution.getStatus()).thenReturn(BatchStatus.COMPLETED);

    assertEquals(javax.batch.runtime.BatchStatus.COMPLETED, context.getBatchStatus());
  }
예제 #12
0
 @Test
 public void testJobProperties() {
   assertEquals("jobLevelValue1", context.getProperties().get("jobLevelProperty1"));
 }
예제 #13
0
  @Test
  public void testGetExecutionId() {
    when(execution.getId()).thenReturn(5L);

    assertEquals(5L, context.getExecutionId());
  }
예제 #14
0
  @Test
  public void testGetInstanceId() {
    when(instance.getId()).thenReturn(5L);

    assertEquals(5L, context.getInstanceId());
  }
예제 #15
0
 @Test
 public void testTransientUserData() {
   context.setTransientUserData("This is my data");
   assertEquals("This is my data", context.getTransientUserData());
 }
예제 #16
0
  @Test
  public void testGetJobName() {
    when(instance.getJobName()).thenReturn("jobName");

    assertEquals("jobName", context.getJobName());
  }
예제 #17
0
 public AbstractExporter(JobContext jobContext) {
   this.jobContext = jobContext;
   this.props = jobContext.getProps();
 }
 /**
  * Check for validity of the output-specification for the job.
  *
  * @param context information about the job
  * @throws IOException when output should not be attempted
  */
 @Override
 public void checkOutputSpecs(JobContext context) {
   checkOutputSpecs(context.getConfiguration());
 }