コード例 #1
0
  public void testStringSorting() {
    File input = null;
    File sorted = null;

    try {
      // the source file
      input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");

      // the sorted file
      sorted = File.createTempFile("sorted_strings", "txt");

      String[] command = {
        "/bin/bash",
        "-c",
        "export LC_ALL=\"C\" && cat \""
            + input.getAbsolutePath()
            + "\" | sort > \""
            + sorted.getAbsolutePath()
            + "\""
      };

      Process p = null;
      try {
        p = Runtime.getRuntime().exec(command);
        int retCode = p.waitFor();
        if (retCode != 0) {
          throw new Exception("Command failed with return code " + retCode);
        }
        p = null;
      } finally {
        if (p != null) {
          p.destroy();
        }
      }

      // sort the data
      UnilateralSortMerger<String> sorter = null;
      BufferedReader reader = null;
      BufferedReader verifyReader = null;

      try {
        MemoryManager mm = new DefaultMemoryManager(1024 * 1024, 1);
        IOManager ioMan = new IOManager();

        TypeSerializer<String> serializer = StringSerializer.INSTANCE;
        TypeComparator<String> comparator = new StringComparator(true);

        reader = new BufferedReader(new FileReader(input));
        MutableObjectIterator<String> inputIterator = new StringReaderMutableObjectIterator(reader);

        sorter =
            new UnilateralSortMerger<String>(
                mm,
                ioMan,
                inputIterator,
                new DummyInvokable(),
                new RuntimeStatelessSerializerFactory<String>(serializer, String.class),
                comparator,
                1.0,
                4,
                0.8f);

        MutableObjectIterator<String> sortedData = sorter.getIterator();

        reader.close();

        // verify
        verifyReader = new BufferedReader(new FileReader(sorted));
        String next;

        while ((next = verifyReader.readLine()) != null) {
          String nextFromStratoSort = sortedData.next("");

          Assert.assertNotNull(nextFromStratoSort);
          Assert.assertEquals(next, nextFromStratoSort);
        }
      } finally {
        if (reader != null) {
          reader.close();
        }
        if (verifyReader != null) {
          verifyReader.close();
        }
        if (sorter != null) {
          sorter.close();
        }
      }
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      Assert.fail(e.getMessage());
    } finally {
      if (input != null) {
        input.delete();
      }
      if (sorted != null) {
        sorted.delete();
      }
    }
  }
コード例 #2
0
  public MutableObjectIterator<T> finishWriteAndSortKeys(List<MemorySegment> memory)
      throws IOException {
    if (recordsOutFile == null || keysOutFile == null) {
      throw new IllegalStateException("The LargeRecordHandler has not spilled any records");
    }

    // close the writers and
    final int lastBlockBytesKeys;
    final int lastBlockBytesRecords;

    recordsOutFile.close();
    keysOutFile.close();
    lastBlockBytesKeys = keysOutFile.getBytesInLatestSegment();
    lastBlockBytesRecords = recordsOutFile.getBytesInLatestSegment();
    recordsOutFile = null;
    keysOutFile = null;

    final int pagesForReaders =
        Math.max(
            3 * MIN_SEGMENTS_FOR_KEY_SPILLING,
            Math.min(2 * MAX_SEGMENTS_FOR_KEY_SPILLING, memory.size() / 50));
    final int pagesForKeyReader =
        Math.min(pagesForReaders - MIN_SEGMENTS_FOR_KEY_SPILLING, MAX_SEGMENTS_FOR_KEY_SPILLING);
    final int pagesForRecordReader = pagesForReaders - pagesForKeyReader;

    // grab memory for the record reader
    ArrayList<MemorySegment> memForRecordReader = new ArrayList<MemorySegment>();
    ArrayList<MemorySegment> memForKeysReader = new ArrayList<MemorySegment>();

    for (int i = 0; i < pagesForRecordReader; i++) {
      memForRecordReader.add(memory.remove(memory.size() - 1));
    }
    for (int i = 0; i < pagesForKeyReader; i++) {
      memForKeysReader.add(memory.remove(memory.size() - 1));
    }

    keysReader =
        new FileChannelInputView(
            ioManager.createBlockChannelReader(keysChannel),
            memManager,
            memForKeysReader,
            lastBlockBytesKeys);
    InputViewIterator<Tuple> keyIterator = new InputViewIterator<Tuple>(keysReader, keySerializer);

    keySorter =
        new UnilateralSortMerger<Tuple>(
            memManager,
            memory,
            ioManager,
            keyIterator,
            memoryOwner,
            keySerializerFactory,
            keyComparator,
            1,
            maxFilehandles,
            1.0f,
            false);

    // wait for the sorter to sort the keys
    MutableObjectIterator<Tuple> result;
    try {
      result = keySorter.getIterator();
    } catch (InterruptedException e) {
      throw new IOException(e);
    }

    recordsReader =
        new SeekableFileChannelInputView(
            ioManager, recordsChannel, memManager, memForRecordReader, lastBlockBytesRecords);

    return new FetchingIterator<T>(serializer, result, recordsReader, keySerializer, numKeyFields);
  }