Esempi in Java per RubixFile

Linguaggio di programmazione: Java

Spazio dei nomi/nome del pacchetto: com.linkedin.cubert.io.rubix

Classe/tipologia: RubixFile

Esempi su hotexamples.com: 3

RubixFile in Java: 3 esempi trovati. Questi sono i migliori esempi reali in Java per com.linkedin.cubert.io.rubix.RubixFile, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

getKeyData(8)

getKeyClass(2)

dumpText(1)

extract(1)

getBlockSerializationType(1)

getPartitionKeys(1)

getSchema(1)

getSortKeys(1)

getValueClass(1)

Esempio n. 1

Mostra file

File: RubixFile.java Progetto: srcclr-test-projects/Cubert

  private static void dumpText(List<RubixFile<Tuple, Object>> rfiles, String output, int numRows)
      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
          IllegalAccessException {
    Configuration conf = new JobConf();
    int totalBlocks = 0;

    for (RubixFile<Tuple, Object> rfile : rfiles) {
      Path path = rfile.path;
      List<KeyData<Tuple>> keyDataList = rfile.getKeyData();

      print.f("--- %s", path.toString());
      print.f("Schema: %s", rfile.getSchema().toString());
      print.f("PartitionKeys: %s", Arrays.toString(rfile.getPartitionKeys()));
      print.f("SortKeys %s", Arrays.toString(rfile.getSortKeys()));
      print.f("Block Serialization Type: %s", rfile.getBlockSerializationType());
      print.f("Number of blocks: %d", keyDataList.size());

      totalBlocks += keyDataList.size();

      int cumrows = 0;

      for (KeyData<Tuple> keyData : keyDataList) {
        print.f(
            "Block %s. BlockId: %d (Reducer: %d Index:%d)",
            keyData,
            keyData.blockId,
            (keyData.getBlockId() >> 32),
            (keyData.getBlockId() & (((long) 1 << 32) - 1)));

        if (numRows > 0) {
          RubixInputSplit<Tuple, Object> split =
              new RubixInputSplit<Tuple, Object>(
                  conf,
                  path,
                  keyData.getKey(),
                  keyData.getOffset(),
                  keyData.getLength(),
                  keyData.getBlockId(),
                  keyData.getNumRecords(),
                  rfile.getKeyClass(),
                  rfile.getValueClass(),
                  rfile.getSchema(),
                  rfile.getBlockSerializationType());

          RubixRecordReader<Tuple, Object> recordReader = new RubixRecordReader<Tuple, Object>();
          recordReader.initialize(split, conf);
          int rows = 0;

          while (recordReader.nextKeyValue()) {
            rows++;
            if (rows < numRows) {
              System.out.println("\t" + recordReader.getCurrentValue());
            } else {
              break;
            }
          }

          cumrows += keyData.getNumRecords();
          System.out.println(
              String.format("\tRows=%d Cummulative=%d", keyData.getNumRecords(), cumrows));
        }
      }
    }

    print.f("Total Blocks: %d", totalBlocks);
  }

Esempio n. 2

Mostra file

File: RubixFile.java Progetto: srcclr-test-projects/Cubert

  public static void main(String[] args)
      throws IOException, ClassNotFoundException, InterruptedException, ParseException,
          InstantiationException, IllegalAccessException {
    final int VERBOSE_NUM_ROWS = 4;

    Options options = new Options();

    options.addOption("h", "help", false, "shows this message");
    options.addOption("v", "verbose", false, "print summary and first few rows of each block");
    options.addOption("m", "metadata", false, "show the metadata");
    options.addOption(
        "d",
        "dump",
        false,
        "dump the contents of the rubix file. Use -f for specifying format, and -o for specifying "
            + "output location");
    options.addOption(
        "f", "format", true, "the data format for dumping data (AVRO or TEXT). Default: TEXT");
    options.addOption(
        "e",
        "extract",
        true,
        "Extract one or more rubix blocks starting from the given blockId. Use -e blockId,numBlocks "
            + "for specifying the blocks to be extracted. Use -o for specifying output location");
    options.addOption("o", true, "Store the output at the specified location");

    CommandLineParser parser = new BasicParser();

    // parse the command line arguments
    CommandLine line = parser.parse(options, args);

    // show the help message
    if (line.hasOption("h")) {
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp(
          "RubixFile <rubix file or dir> [options]\nIf no options are provided, print a summary of the blocks.",
          options);
      return;
    }

    // validate provided options
    if (line.hasOption("d") && line.hasOption("e")) {
      System.err.println("Cannot dump (-d) and extract (-e) at the same time!");
      return;
    }

    // obtain the list of rubix files
    String[] files = line.getArgs();
    if (files == null || files.length == 0) {
      System.err.println("Rubix file not specified");
      return;
    }

    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path(files[0]);
    FileStatus[] allFiles;

    FileStatus status = fs.getFileStatus(path);
    if (status.isDir()) {
      allFiles =
          fs.listStatus(
              path,
              new PathFilter() {
                @Override
                public boolean accept(Path path) {
                  return path.toString().contains(RubixConstants.RUBIX_EXTENSION);
                }
              });
    } else {
      allFiles = new FileStatus[] {status};
    }

    // walk over all files and extract the trailer section
    List<RubixFile<Tuple, Object>> rfiles = new ArrayList<RubixFile<Tuple, Object>>();

    for (FileStatus s : allFiles) {
      Path p = s.getPath();

      RubixFile<Tuple, Object> rfile = new RubixFile<Tuple, Object>(conf, p);

      // if printing meta data information.. exit after first file (since all files
      // have the same meta data)
      if (line.hasOption("m")) {
        rfile.getKeyData();

        System.out.println(new ObjectMapper().writer().writeValueAsString(rfile.metadataJson));
        break;
      }

      rfiles.add(rfile);
    }

    // dump the data
    if (line.hasOption("d")) {
      String format = line.getOptionValue("f");
      if (format == null) format = "TEXT";

      format = format.trim().toUpperCase();

      if (format.equals("AVRO")) {
        // dumpAvro(rfiles, line.getOptionValue("o"));
        throw new UnsupportedOperationException(
            "Dumping to avro is not currently supporting. Please write a Cubert (map-only) script to store data in avro format");
      } else if (format.equals("TEXT")) {
        if (line.hasOption("o")) {
          System.err.println("Dumping TEXT format data *into a file* is not currently supported");
          return;
        }
        dumpText(rfiles, line.getOptionValue("o"), Integer.MAX_VALUE);
      } else {
        System.err.println("Invalid format [" + format + "] for dumping. Please use AVRO or TEXT");
        return;
      }
    }
    // extract arguments: -e blockId,numBlocks(contiguous) -o ouputLocation
    else if (line.hasOption("e")) {
      String extractArguments = line.getOptionValue("e");
      String outputLocation;
      if (line.hasOption("o")) {
        outputLocation = line.getOptionValue("o");
      } else {
        System.err.println("Need to specify the location to store the output");
        return;
      }
      long blockId;
      int numBlocks = 1;
      if (extractArguments.contains(",")) {
        String[] splitExtractArgs = extractArguments.split(",");
        blockId = Long.parseLong(splitExtractArgs[0]);
        numBlocks = Integer.parseInt(splitExtractArgs[1]);
      } else {
        blockId = Long.parseLong(extractArguments);
      }

      extract(rfiles, blockId, numBlocks, outputLocation);
    } else
    // print summary
    {
      dumpText(rfiles, null, line.hasOption("v") ? VERBOSE_NUM_ROWS : 0);
    }
  }

Esempio n. 3

Mostra file

File: RubixFile.java Progetto: srcclr-test-projects/Cubert

  private static void extract(
      List<RubixFile<Tuple, Object>> rfiles, long blockId, int numBlocks, String output)
      throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    Configuration conf = new JobConf();
    File outFile = new File(output);
    if (outFile.exists()) {
      outFile.delete();
    }
    outFile.createNewFile();
    BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outFile));
    ByteArrayOutputStream keySectionStream = new ByteArrayOutputStream();
    DataOutput keySectionOut = new DataOutputStream(keySectionStream);
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    RubixFile<Tuple, Object> lastrFile = null;
    JsonNode json;
    long totalLength = 0;

    final int BUF_SIZE = 32 * 1024;
    long blockIds[] = new long[numBlocks];
    int foundBlocks = 0;

    for (int i = 0; i < numBlocks; i++) blockIds[i] = blockId + i;

    for (int i = 0; i < numBlocks; i++) {
      boolean found = false;
      for (RubixFile<Tuple, Object> rfile : rfiles) {
        print.f("Checking %s", rfile.path.toString());
        List<KeyData<Tuple>> keyDataList = rfile.getKeyData();
        for (KeyData<Tuple> keyData : keyDataList) {
          if (keyData.getBlockId() == blockIds[i]) {
            long offset = keyData.getOffset();
            long length = keyData.getLength();
            Tuple key = keyData.getKey();
            print.f(
                "Extracting block %d (off=%d len=%d) from %s",
                keyData.getBlockId(), offset, length, rfile.path.toString());

            // copy the data
            if (length > 0) {
              FileSystem fs = FileSystem.get(conf);
              FSDataInputStream in = fs.open(rfile.path);
              in.seek(offset);

              byte[] data = new byte[BUF_SIZE];
              long toRead = length;
              while (toRead > 0) {
                int thisRead = toRead > BUF_SIZE ? BUF_SIZE : (int) toRead;
                in.readFully(data, 0, thisRead);
                bos.write(data, 0, thisRead);
                toRead -= thisRead;
                System.out.print(".");
              }
              System.out.println();
            }
            // copy the key section
            Serializer<Tuple> keySerializer =
                serializationFactory.getSerializer(rfile.getKeyClass());
            keySerializer.open(keySectionStream);

            keySerializer.serialize(key);
            keySectionOut.writeLong(totalLength); // position
            keySectionOut.writeLong(keyData.getBlockId());
            keySectionOut.writeLong(keyData.getNumRecords());
            foundBlocks++;
            totalLength += length;
            lastrFile = rfile;

            found = true;
            break;
          }
        }
        if (found) {
          break;
        }
      }
      if (!found) System.err.println("Cannot locate block with id " + blockIds[i]);
    }
    byte[] trailerBytes = keySectionStream.toByteArray();

    json = JsonUtils.cloneNode(lastrFile.metadataJson);
    ((ObjectNode) json).put("numberOfBlocks", foundBlocks);

    DataOutput out = new DataOutputStream(bos);
    out.writeUTF(json.toString());
    out.writeInt(trailerBytes.length);
    out.write(trailerBytes);
    out.writeLong(totalLength); // trailer start offset
    bos.close();
  }