private static void extract(
      List<RubixFile<Tuple, Object>> rfiles, long blockId, int numBlocks, String output)
      throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    Configuration conf = new JobConf();
    File outFile = new File(output);
    if (outFile.exists()) {
      outFile.delete();
    }
    outFile.createNewFile();
    BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outFile));
    ByteArrayOutputStream keySectionStream = new ByteArrayOutputStream();
    DataOutput keySectionOut = new DataOutputStream(keySectionStream);
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    RubixFile<Tuple, Object> lastrFile = null;
    JsonNode json;
    long totalLength = 0;

    final int BUF_SIZE = 32 * 1024;
    long blockIds[] = new long[numBlocks];
    int foundBlocks = 0;

    for (int i = 0; i < numBlocks; i++) blockIds[i] = blockId + i;

    for (int i = 0; i < numBlocks; i++) {
      boolean found = false;
      for (RubixFile<Tuple, Object> rfile : rfiles) {
        print.f("Checking %s", rfile.path.toString());
        List<KeyData<Tuple>> keyDataList = rfile.getKeyData();
        for (KeyData<Tuple> keyData : keyDataList) {
          if (keyData.getBlockId() == blockIds[i]) {
            long offset = keyData.getOffset();
            long length = keyData.getLength();
            Tuple key = keyData.getKey();
            print.f(
                "Extracting block %d (off=%d len=%d) from %s",
                keyData.getBlockId(), offset, length, rfile.path.toString());

            // copy the data
            if (length > 0) {
              FileSystem fs = FileSystem.get(conf);
              FSDataInputStream in = fs.open(rfile.path);
              in.seek(offset);

              byte[] data = new byte[BUF_SIZE];
              long toRead = length;
              while (toRead > 0) {
                int thisRead = toRead > BUF_SIZE ? BUF_SIZE : (int) toRead;
                in.readFully(data, 0, thisRead);
                bos.write(data, 0, thisRead);
                toRead -= thisRead;
                System.out.print(".");
              }
              System.out.println();
            }
            // copy the key section
            Serializer<Tuple> keySerializer =
                serializationFactory.getSerializer(rfile.getKeyClass());
            keySerializer.open(keySectionStream);

            keySerializer.serialize(key);
            keySectionOut.writeLong(totalLength); // position
            keySectionOut.writeLong(keyData.getBlockId());
            keySectionOut.writeLong(keyData.getNumRecords());
            foundBlocks++;
            totalLength += length;
            lastrFile = rfile;

            found = true;
            break;
          }
        }
        if (found) {
          break;
        }
      }
      if (!found) System.err.println("Cannot locate block with id " + blockIds[i]);
    }
    byte[] trailerBytes = keySectionStream.toByteArray();

    json = JsonUtils.cloneNode(lastrFile.metadataJson);
    ((ObjectNode) json).put("numberOfBlocks", foundBlocks);

    DataOutput out = new DataOutputStream(bos);
    out.writeUTF(json.toString());
    out.writeInt(trailerBytes.length);
    out.write(trailerBytes);
    out.writeLong(totalLength); // trailer start offset
    bos.close();
  }
  private static void dumpText(List<RubixFile<Tuple, Object>> rfiles, String output, int numRows)
      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
          IllegalAccessException {
    Configuration conf = new JobConf();
    int totalBlocks = 0;

    for (RubixFile<Tuple, Object> rfile : rfiles) {
      Path path = rfile.path;
      List<KeyData<Tuple>> keyDataList = rfile.getKeyData();

      print.f("--- %s", path.toString());
      print.f("Schema: %s", rfile.getSchema().toString());
      print.f("PartitionKeys: %s", Arrays.toString(rfile.getPartitionKeys()));
      print.f("SortKeys %s", Arrays.toString(rfile.getSortKeys()));
      print.f("Block Serialization Type: %s", rfile.getBlockSerializationType());
      print.f("Number of blocks: %d", keyDataList.size());

      totalBlocks += keyDataList.size();

      int cumrows = 0;

      for (KeyData<Tuple> keyData : keyDataList) {
        print.f(
            "Block %s. BlockId: %d (Reducer: %d Index:%d)",
            keyData,
            keyData.blockId,
            (keyData.getBlockId() >> 32),
            (keyData.getBlockId() & (((long) 1 << 32) - 1)));

        if (numRows > 0) {
          RubixInputSplit<Tuple, Object> split =
              new RubixInputSplit<Tuple, Object>(
                  conf,
                  path,
                  keyData.getKey(),
                  keyData.getOffset(),
                  keyData.getLength(),
                  keyData.getBlockId(),
                  keyData.getNumRecords(),
                  rfile.getKeyClass(),
                  rfile.getValueClass(),
                  rfile.getSchema(),
                  rfile.getBlockSerializationType());

          RubixRecordReader<Tuple, Object> recordReader = new RubixRecordReader<Tuple, Object>();
          recordReader.initialize(split, conf);
          int rows = 0;

          while (recordReader.nextKeyValue()) {
            rows++;
            if (rows < numRows) {
              System.out.println("\t" + recordReader.getCurrentValue());
            } else {
              break;
            }
          }

          cumrows += keyData.getNumRecords();
          System.out.println(
              String.format("\tRows=%d Cummulative=%d", keyData.getNumRecords(), cumrows));
        }
      }
    }

    print.f("Total Blocks: %d", totalBlocks);
  }
  @SuppressWarnings("unchecked")
  public List<KeyData<K>> getKeyData() throws IOException, ClassNotFoundException {
    if (keyData != null) return keyData;

    final FileSystem fs = FileSystem.get(conf);
    keyData = new ArrayList<KeyData<K>>();

    final long filesize = fs.getFileStatus(path).getLen();
    FSDataInputStream in = fs.open(path);

    /* The last long in the file is the start position of the trailer section */
    in.seek(filesize - 8);
    long metaDataStartPos = in.readLong();

    in.seek(metaDataStartPos);

    ObjectMapper mapper = new ObjectMapper();
    metadataJson = mapper.readValue(in.readUTF(), JsonNode.class);

    int keySectionSize = in.readInt();

    // load the key section
    byte[] keySection = new byte[keySectionSize];

    in.seek(filesize - keySectionSize - 8);
    in.read(keySection, 0, keySectionSize);
    in.close();

    ByteArrayInputStream bis = new ByteArrayInputStream(keySection);
    DataInput dataInput = new DataInputStream(bis);

    int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue();

    // load the key section
    keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass"));
    valueClass = (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass"));

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass);

    deserializer.open(bis);

    while (bis.available() > 0 && numberOfBlocks > 0) {
      K key = deserializer.deserialize(null);

      long offset = dataInput.readLong();
      long blockId = dataInput.readLong();
      long numRecords = dataInput.readLong();

      keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId));
      numberOfBlocks--;
    }

    // Assign length to each keydata entry
    int numEntries = keyData.size();
    for (int i = 1; i < numEntries; i++) {
      KeyData<K> prev = keyData.get(i - 1);
      KeyData<K> current = keyData.get(i);

      prev.setLength(current.getOffset() - prev.getOffset());
    }

    if (numEntries > 0) {
      KeyData<K> last = keyData.get(numEntries - 1);
      last.setLength(metaDataStartPos - last.offset);
    }

    return keyData;
  }