/** * Helper method to create a {@link WorkUnit}, set it's staging directories, and create the * staging directories on the local fs * * @param workUnitName is the name of the {@link WorkUnit} to create * @return the {@link WorkUnit} that was created * @throws IOException */ private WorkUnit createAndSetWorkUnit(String workUnitName) throws IOException { WorkUnit wu = WorkUnit.createEmpty(); wu.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.TASK_ID_KEY, 1, 0), System.nanoTime()); Path wuStagingDir = new Path( OUTPUT_PATH, JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + STAGING_DIR_NAME); wu.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 1, 0), wuStagingDir.toString()); this.fs.mkdirs(wuStagingDir); this.stagingDirs.add(wuStagingDir); Path wuOutputDir = new Path( OUTPUT_PATH, JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + OUTPUT_DIR_NAME); wu.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0), wuOutputDir.toString()); this.fs.mkdirs(wuOutputDir); this.stagingDirs.add(wuOutputDir); return wu; }
@Override public List<WorkUnit> pack(Map<String, List<WorkUnit>> workUnitsByTopic, int numContainers) { setWorkUnitEstSizes(workUnitsByTopic); List<WorkUnit> workUnits = Lists.newArrayList(); for (List<WorkUnit> workUnitsForTopic : workUnitsByTopic.values()) { // For each topic, merge all empty workunits into a single workunit, so that a single // empty task will be created instead of many. MultiWorkUnit zeroSizeWorkUnit = MultiWorkUnit.createEmpty(); for (WorkUnit workUnit : workUnitsForTopic) { if (DoubleMath.fuzzyEquals(getWorkUnitEstSize(workUnit), 0.0, EPS)) { addWorkUnitToMultiWorkUnit(workUnit, zeroSizeWorkUnit); } else { workUnit.setWatermarkInterval(getWatermarkIntervalFromWorkUnit(workUnit)); workUnits.add(workUnit); } } if (!zeroSizeWorkUnit.getWorkUnits().isEmpty()) { workUnits.add(squeezeMultiWorkUnit(zeroSizeWorkUnit)); } } return worstFitDecreasingBinPacking(workUnits, numContainers); }
@BeforeClass public void setUp() { this.multiWorkUnit = new MultiWorkUnit(); WorkUnit workUnit1 = new WorkUnit(); workUnit1.setHighWaterMark(1000); workUnit1.setLowWaterMark(0); workUnit1.setProp("k1", "v1"); this.multiWorkUnit.addWorkUnit(workUnit1); WorkUnit workUnit2 = new WorkUnit(); workUnit2.setHighWaterMark(2000); workUnit2.setLowWaterMark(1001); workUnit2.setProp("k2", "v2"); this.multiWorkUnit.addWorkUnit(workUnit2); }
@BeforeClass public void setupWorkUnitFiles() throws IOException { this.conf = new Configuration(); this.fs = FileSystem.getLocal(this.conf); this.stagingDirs = Lists.newArrayList(); // Create a list of WorkUnits to serialize WorkUnit wu1 = createAndSetWorkUnit("wu1"); WorkUnit wu2 = createAndSetWorkUnit("wu2"); WorkUnit wu3 = createAndSetWorkUnit("wu3"); WorkUnit wu4 = createAndSetWorkUnit("wu4"); // Create a MultiWorkUnit to serialize MultiWorkUnit mwu1 = new MultiWorkUnit(); mwu1.setProp(ConfigurationKeys.TASK_ID_KEY, System.nanoTime()); mwu1.addWorkUnits(Arrays.asList(wu3, wu4)); Path inputDir = new Path(new Path(OUTPUT_PATH, JOB_NAME), "input"); // Writer each WorkUnit to a separate file under inputDir Closer closer = Closer.create(); try { wu1.write( closer.register( this.fs.create( new Path( inputDir, wu1.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_") .suffix("wu")))); wu2.write( closer.register( this.fs.create( new Path( inputDir, wu2.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_") .suffix("wu")))); mwu1.write( closer.register( this.fs.create( new Path( inputDir, mwu1.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_") .suffix("mwu")))); } finally { closer.close(); } }