Exemple #1
0
  @Test
  public void testMergeIntermediates() {
    // Test Merge
    List<IntermediateEntry> intermediateEntries = new ArrayList<IntermediateEntry>();

    int[] pageLengths = {
      10 * 1024 * 1024, 10 * 1024 * 1024, 10 * 1024 * 1024, 5 * 1024 * 1024
    }; // 35 MB
    long expectedTotalLength = 0;
    for (int i = 0; i < 20; i++) {
      List<Pair<Long, Integer>> pages = new ArrayList<Pair<Long, Integer>>();
      long offset = 0;
      for (int j = 0; j < pageLengths.length; j++) {
        pages.add(new Pair(offset, pageLengths[j]));
        offset += pageLengths[j];
        expectedTotalLength += pageLengths[j];
      }
      IntermediateEntry interm = new IntermediateEntry(i, -1, -1, new Task.PullHost("" + i, i));
      interm.setPages(pages);
      interm.setVolume(offset);
      intermediateEntries.add(interm);
    }

    long splitVolume = 128 * 1024 * 1024;
    List<List<FetchImpl>> fetches =
        Repartitioner.splitOrMergeIntermediates(
            null, intermediateEntries, splitVolume, 10 * 1024 * 1024);
    assertEquals(6, fetches.size());

    int totalInterms = 0;
    int index = 0;
    int numZeroPosFetcher = 0;
    long totalLength = 0;
    for (List<FetchImpl> eachFetchList : fetches) {
      totalInterms += eachFetchList.size();
      long eachFetchVolume = 0;
      for (FetchImpl eachFetch : eachFetchList) {
        eachFetchVolume += eachFetch.getLength();
        if (eachFetch.getOffset() == 0) {
          numZeroPosFetcher++;
        }
        totalLength += eachFetch.getLength();
      }
      assertTrue(
          eachFetchVolume + " should be smaller than splitVolume", eachFetchVolume < splitVolume);
      if (index < fetches.size() - 1) {
        assertTrue(
            eachFetchVolume + " should be great than 100MB", eachFetchVolume >= 100 * 1024 * 1024);
      }
      index++;
    }
    assertEquals(23, totalInterms);
    assertEquals(20, numZeroPosFetcher);
    assertEquals(expectedTotalLength, totalLength);
  }
Exemple #2
0
  @Test
  public void testCreateHashFetchURL() throws Exception {
    QueryId q1 = TestTajoIds.createQueryId(1315890136000l, 2);
    String hostName = "tajo1";
    int port = 1234;
    ExecutionBlockId sid = new ExecutionBlockId(q1, 2);
    int numPartition = 10;

    Map<Integer, List<IntermediateEntry>> intermediateEntries =
        new HashMap<Integer, List<IntermediateEntry>>();
    for (int i = 0; i < numPartition; i++) {
      intermediateEntries.put(i, new ArrayList<IntermediateEntry>());
    }
    for (int i = 0; i < 1000; i++) {
      int partitionId = i % numPartition;
      IntermediateEntry entry =
          new IntermediateEntry(i, 0, partitionId, new Task.PullHost(hostName, port));
      entry.setEbId(sid);
      entry.setVolume(10);
      intermediateEntries.get(partitionId).add(entry);
    }

    Map<Integer, Map<ExecutionBlockId, List<IntermediateEntry>>> hashEntries =
        new HashMap<Integer, Map<ExecutionBlockId, List<IntermediateEntry>>>();

    for (Map.Entry<Integer, List<IntermediateEntry>> eachEntry : intermediateEntries.entrySet()) {
      FetchImpl fetch =
          new FetchImpl(
              new Task.PullHost(hostName, port),
              ShuffleType.HASH_SHUFFLE,
              sid,
              eachEntry.getKey(),
              eachEntry.getValue());

      fetch.setName(sid.toString());

      FetchProto proto = fetch.getProto();
      fetch = new FetchImpl(proto);
      assertEquals(proto, fetch.getProto());

      Map<ExecutionBlockId, List<IntermediateEntry>> ebEntries =
          new HashMap<ExecutionBlockId, List<IntermediateEntry>>();
      ebEntries.put(sid, eachEntry.getValue());

      hashEntries.put(eachEntry.getKey(), ebEntries);

      List<URI> uris = fetch.getURIs();
      assertEquals(1, uris.size()); // In Hash Suffle, Fetcher return only one URI per partition.

      URI uri = uris.get(0);
      final Map<String, List<String>> params = new QueryStringDecoder(uri).parameters();

      assertEquals(eachEntry.getKey().toString(), params.get("p").get(0));
      assertEquals("h", params.get("type").get(0));
      assertEquals("" + sid.getId(), params.get("sid").get(0));
    }

    Map<Integer, Map<ExecutionBlockId, List<IntermediateEntry>>> mergedHashEntries =
        Repartitioner.mergeIntermediateByPullHost(hashEntries);

    assertEquals(numPartition, mergedHashEntries.size());
    for (int i = 0; i < numPartition; i++) {
      Map<ExecutionBlockId, List<IntermediateEntry>> eachEntry = mergedHashEntries.get(0);
      assertEquals(1, eachEntry.size());
      List<IntermediateEntry> interEntry = eachEntry.get(sid);
      assertEquals(1, interEntry.size());

      assertEquals(1000, interEntry.get(0).getVolume());
    }
  }
Exemple #3
0
  @Test
  public void testSplitIntermediatesWithUniqueHost() {
    List<IntermediateEntry> intermediateEntries = new ArrayList<IntermediateEntry>();

    int[] pageLengths = new int[20]; // 195MB
    for (int i = 0; i < pageLengths.length; i++) {
      if (i < pageLengths.length - 1) {
        pageLengths[i] = 10 * 1024 * 1024;
      } else {
        pageLengths[i] = 5 * 1024 * 1024;
      }
    }

    long expectedTotalLength = 0;
    Task.PullHost pullHost = new Task.PullHost("host", 0);

    for (int i = 0; i < 20; i++) {
      List<Pair<Long, Integer>> pages = new ArrayList<Pair<Long, Integer>>();
      long offset = 0;
      for (int j = 0; j < pageLengths.length; j++) {
        pages.add(new Pair(offset, pageLengths[j]));
        offset += pageLengths[j];
        expectedTotalLength += pageLengths[j];
      }
      IntermediateEntry interm = new IntermediateEntry(i, -1, 0, pullHost);
      interm.setPages(pages);
      interm.setVolume(offset);
      intermediateEntries.add(interm);
    }

    long splitVolume = 128 * 1024 * 1024;
    List<List<FetchImpl>> fetches =
        Repartitioner.splitOrMergeIntermediates(
            null, intermediateEntries, splitVolume, 10 * 1024 * 1024);
    assertEquals(32, fetches.size());

    int expectedSize = 0;
    Set<FetchImpl> fetchSet = TUtil.newHashSet();
    for (List<FetchImpl> list : fetches) {
      expectedSize += list.size();
      fetchSet.addAll(list);
    }
    assertEquals(expectedSize, fetchSet.size());

    int index = 0;
    int numZeroPosFetcher = 0;
    long totalLength = 0;
    Set<String> uniqPullHost = new HashSet<String>();

    for (List<FetchImpl> eachFetchList : fetches) {
      long length = 0;
      for (FetchImpl eachFetch : eachFetchList) {
        if (eachFetch.getOffset() == 0) {
          numZeroPosFetcher++;
        }
        totalLength += eachFetch.getLength();
        length += eachFetch.getLength();
        uniqPullHost.add(eachFetch.getPullHost().toString());
      }
      assertTrue(length + " should be smaller than splitVolume", length < splitVolume);
      if (index < fetches.size() - 1) {
        assertTrue(
            length + " should be great than 100MB" + fetches.size() + "," + index,
            length >= 100 * 1024 * 1024);
      }
      index++;
    }
    assertEquals(20, numZeroPosFetcher);
    assertEquals(1, uniqPullHost.size());
    assertEquals(expectedTotalLength, totalLength);
  }