@Test public void testMergeIntermediates() { // Test Merge List<IntermediateEntry> intermediateEntries = new ArrayList<IntermediateEntry>(); int[] pageLengths = { 10 * 1024 * 1024, 10 * 1024 * 1024, 10 * 1024 * 1024, 5 * 1024 * 1024 }; // 35 MB long expectedTotalLength = 0; for (int i = 0; i < 20; i++) { List<Pair<Long, Integer>> pages = new ArrayList<Pair<Long, Integer>>(); long offset = 0; for (int j = 0; j < pageLengths.length; j++) { pages.add(new Pair(offset, pageLengths[j])); offset += pageLengths[j]; expectedTotalLength += pageLengths[j]; } IntermediateEntry interm = new IntermediateEntry(i, -1, -1, new Task.PullHost("" + i, i)); interm.setPages(pages); interm.setVolume(offset); intermediateEntries.add(interm); } long splitVolume = 128 * 1024 * 1024; List<List<FetchImpl>> fetches = Repartitioner.splitOrMergeIntermediates( null, intermediateEntries, splitVolume, 10 * 1024 * 1024); assertEquals(6, fetches.size()); int totalInterms = 0; int index = 0; int numZeroPosFetcher = 0; long totalLength = 0; for (List<FetchImpl> eachFetchList : fetches) { totalInterms += eachFetchList.size(); long eachFetchVolume = 0; for (FetchImpl eachFetch : eachFetchList) { eachFetchVolume += eachFetch.getLength(); if (eachFetch.getOffset() == 0) { numZeroPosFetcher++; } totalLength += eachFetch.getLength(); } assertTrue( eachFetchVolume + " should be smaller than splitVolume", eachFetchVolume < splitVolume); if (index < fetches.size() - 1) { assertTrue( eachFetchVolume + " should be great than 100MB", eachFetchVolume >= 100 * 1024 * 1024); } index++; } assertEquals(23, totalInterms); assertEquals(20, numZeroPosFetcher); assertEquals(expectedTotalLength, totalLength); }
@Test public void testCreateHashFetchURL() throws Exception { QueryId q1 = TestTajoIds.createQueryId(1315890136000l, 2); String hostName = "tajo1"; int port = 1234; ExecutionBlockId sid = new ExecutionBlockId(q1, 2); int numPartition = 10; Map<Integer, List<IntermediateEntry>> intermediateEntries = new HashMap<Integer, List<IntermediateEntry>>(); for (int i = 0; i < numPartition; i++) { intermediateEntries.put(i, new ArrayList<IntermediateEntry>()); } for (int i = 0; i < 1000; i++) { int partitionId = i % numPartition; IntermediateEntry entry = new IntermediateEntry(i, 0, partitionId, new Task.PullHost(hostName, port)); entry.setEbId(sid); entry.setVolume(10); intermediateEntries.get(partitionId).add(entry); } Map<Integer, Map<ExecutionBlockId, List<IntermediateEntry>>> hashEntries = new HashMap<Integer, Map<ExecutionBlockId, List<IntermediateEntry>>>(); for (Map.Entry<Integer, List<IntermediateEntry>> eachEntry : intermediateEntries.entrySet()) { FetchImpl fetch = new FetchImpl( new Task.PullHost(hostName, port), ShuffleType.HASH_SHUFFLE, sid, eachEntry.getKey(), eachEntry.getValue()); fetch.setName(sid.toString()); FetchProto proto = fetch.getProto(); fetch = new FetchImpl(proto); assertEquals(proto, fetch.getProto()); Map<ExecutionBlockId, List<IntermediateEntry>> ebEntries = new HashMap<ExecutionBlockId, List<IntermediateEntry>>(); ebEntries.put(sid, eachEntry.getValue()); hashEntries.put(eachEntry.getKey(), ebEntries); List<URI> uris = fetch.getURIs(); assertEquals(1, uris.size()); // In Hash Suffle, Fetcher return only one URI per partition. URI uri = uris.get(0); final Map<String, List<String>> params = new QueryStringDecoder(uri).parameters(); assertEquals(eachEntry.getKey().toString(), params.get("p").get(0)); assertEquals("h", params.get("type").get(0)); assertEquals("" + sid.getId(), params.get("sid").get(0)); } Map<Integer, Map<ExecutionBlockId, List<IntermediateEntry>>> mergedHashEntries = Repartitioner.mergeIntermediateByPullHost(hashEntries); assertEquals(numPartition, mergedHashEntries.size()); for (int i = 0; i < numPartition; i++) { Map<ExecutionBlockId, List<IntermediateEntry>> eachEntry = mergedHashEntries.get(0); assertEquals(1, eachEntry.size()); List<IntermediateEntry> interEntry = eachEntry.get(sid); assertEquals(1, interEntry.size()); assertEquals(1000, interEntry.get(0).getVolume()); } }
@Test public void testSplitIntermediatesWithUniqueHost() { List<IntermediateEntry> intermediateEntries = new ArrayList<IntermediateEntry>(); int[] pageLengths = new int[20]; // 195MB for (int i = 0; i < pageLengths.length; i++) { if (i < pageLengths.length - 1) { pageLengths[i] = 10 * 1024 * 1024; } else { pageLengths[i] = 5 * 1024 * 1024; } } long expectedTotalLength = 0; Task.PullHost pullHost = new Task.PullHost("host", 0); for (int i = 0; i < 20; i++) { List<Pair<Long, Integer>> pages = new ArrayList<Pair<Long, Integer>>(); long offset = 0; for (int j = 0; j < pageLengths.length; j++) { pages.add(new Pair(offset, pageLengths[j])); offset += pageLengths[j]; expectedTotalLength += pageLengths[j]; } IntermediateEntry interm = new IntermediateEntry(i, -1, 0, pullHost); interm.setPages(pages); interm.setVolume(offset); intermediateEntries.add(interm); } long splitVolume = 128 * 1024 * 1024; List<List<FetchImpl>> fetches = Repartitioner.splitOrMergeIntermediates( null, intermediateEntries, splitVolume, 10 * 1024 * 1024); assertEquals(32, fetches.size()); int expectedSize = 0; Set<FetchImpl> fetchSet = TUtil.newHashSet(); for (List<FetchImpl> list : fetches) { expectedSize += list.size(); fetchSet.addAll(list); } assertEquals(expectedSize, fetchSet.size()); int index = 0; int numZeroPosFetcher = 0; long totalLength = 0; Set<String> uniqPullHost = new HashSet<String>(); for (List<FetchImpl> eachFetchList : fetches) { long length = 0; for (FetchImpl eachFetch : eachFetchList) { if (eachFetch.getOffset() == 0) { numZeroPosFetcher++; } totalLength += eachFetch.getLength(); length += eachFetch.getLength(); uniqPullHost.add(eachFetch.getPullHost().toString()); } assertTrue(length + " should be smaller than splitVolume", length < splitVolume); if (index < fetches.size() - 1) { assertTrue( length + " should be great than 100MB" + fetches.size() + "," + index, length >= 100 * 1024 * 1024); } index++; } assertEquals(20, numZeroPosFetcher); assertEquals(1, uniqPullHost.size()); assertEquals(expectedTotalLength, totalLength); }