예제 #1
0
    private FSDataOutputStream create(Path f, Reporter reporter, FileStatus srcstat)
        throws IOException {
      if (destFileSys.exists(f)) {
        destFileSys.delete(f, false);
      }
      if (!preserve_status) {
        return destFileSys.create(f, true, sizeBuf, reporter);
      }

      FsPermission permission =
          preseved.contains(FileAttribute.PERMISSION) ? srcstat.getPermission() : null;
      short replication =
          preseved.contains(FileAttribute.REPLICATION)
              ? srcstat.getReplication()
              : destFileSys.getDefaultReplication();
      long blockSize =
          preseved.contains(FileAttribute.BLOCK_SIZE)
              ? srcstat.getBlockSize()
              : destFileSys.getDefaultBlockSize();
      return destFileSys.create(f, permission, true, sizeBuf, replication, blockSize, reporter);
    }
  public static RawSequenceFileWriter createWriter(
      FileSystem fs,
      Configuration conf,
      Path name,
      Class keyClass,
      Class valClass,
      CompressionType compressionType)
      throws IOException {

    return new RawSequenceFileWriter(
        fs,
        conf,
        name,
        keyClass,
        valClass,
        fs.getConf().getInt("io.file.buffer.size", 65536),
        fs.getDefaultReplication(),
        fs.getDefaultBlockSize(),
        null,
        new Metadata());
  }
  /**
   * Use the input splits to take samples of the input and generate sample keys. By default reads
   * 100,000 keys from 10 locations in the input, sorts them and picks N-1 keys to generate N
   * equally sized partitions.
   *
   * @param job the job to sample
   * @param partFile where to write the output file to
   * @throws IOException if something goes wrong
   */
  public static void writePartitionFile(final JobContext job, Path partFile)
      throws IOException, InterruptedException {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraEncryptedInputFormat inFormat = new TeraEncryptedInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
      final int idx = i;
      samplerReader[i] =
          new Thread("Sampler Reader " + idx) {
            {
              setDaemon(true);
            }

            public void run() {
              long records = 0;
              try {
                TaskAttemptContext context =
                    new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
                RecordReader<Text, Text> reader =
                    inFormat.createRecordReader(splits.get(sampleStep * idx), context);
                reader.initialize(splits.get(sampleStep * idx), context);
                while (reader.nextKeyValue()) {
                  sampler.addKey(new Text(reader.getCurrentKey()));
                  records += 1;
                  if (recordsPerSample <= records) {
                    break;
                  }
                }
              } catch (IOException ie) {
                System.err.println(
                    "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                System.exit(-1);
              } catch (InterruptedException e) {

              }
            }
          };
      samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer =
        outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize());
    for (int i = 0; i < samples; i++) {
      try {
        samplerReader[i].join();
      } catch (InterruptedException e) {
      }
    }
    for (Text split : sampler.createPartitions(partitions)) {
      split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
  }
예제 #4
0
 public NoneCompression(FileSystem fileSystem, JavaSparkContext sparkContext) {
   this.sparkContext = sparkContext;
   this.inputBlockSize = fileSystem.getDefaultBlockSize(new Path("/"));
 }
  public void testCreate() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, true);
    conf.set(FsPermission.UMASK_LABEL, "000");
    MiniDFSCluster cluster = null;
    FileSystem fs = null;

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
      cluster.waitActive();
      fs = FileSystem.get(conf);
      FsPermission rootPerm = checkPermission(fs, "/", null);
      FsPermission inheritPerm = FsPermission.createImmutable((short) (rootPerm.toShort() | 0300));

      FsPermission dirPerm = new FsPermission((short) 0777);
      fs.mkdirs(new Path("/a1/a2/a3"), dirPerm);
      checkPermission(fs, "/a1", dirPerm);
      checkPermission(fs, "/a1/a2", dirPerm);
      checkPermission(fs, "/a1/a2/a3", dirPerm);

      dirPerm = new FsPermission((short) 0123);
      FsPermission permission = FsPermission.createImmutable((short) (dirPerm.toShort() | 0300));
      fs.mkdirs(new Path("/aa/1/aa/2/aa/3"), dirPerm);
      checkPermission(fs, "/aa/1", permission);
      checkPermission(fs, "/aa/1/aa/2", permission);
      checkPermission(fs, "/aa/1/aa/2/aa/3", dirPerm);

      FsPermission filePerm = new FsPermission((short) 0444);
      FSDataOutputStream out =
          fs.create(
              new Path("/b1/b2/b3.txt"),
              filePerm,
              true,
              conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
              fs.getDefaultReplication(),
              fs.getDefaultBlockSize(),
              null);
      out.write(123);
      out.close();
      checkPermission(fs, "/b1", inheritPerm);
      checkPermission(fs, "/b1/b2", inheritPerm);
      checkPermission(fs, "/b1/b2/b3.txt", filePerm);

      conf.set(FsPermission.UMASK_LABEL, "022");
      permission = FsPermission.createImmutable((short) 0666);
      FileSystem.mkdirs(fs, new Path("/c1"), new FsPermission(permission));
      FileSystem.create(fs, new Path("/c1/c2.txt"), new FsPermission(permission));
      checkPermission(fs, "/c1", permission);
      checkPermission(fs, "/c1/c2.txt", permission);
    } finally {
      try {
        if (fs != null) fs.close();
      } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));
      }
      try {
        if (cluster != null) cluster.shutdown();
      } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));
      }
    }
  }
예제 #6
0
 @Override
 public long getDefaultBlockSize(FileSystem fs, Path path) {
   return fs.getDefaultBlockSize(path);
 }
예제 #7
0
  /**
   * Log job submitted event to history. Creates a new file in history for the job. if history file
   * creation fails, it disables history for all other events.
   *
   * @param jobConfPath path to job conf xml file in HDFS.
   * @param submitTime time when job tracker received the job
   * @throws IOException
   */
  public void logSubmitted(String jobConfPath, long submitTime, String jobTrackerId)
      throws IOException {

    if (disableHistory) {
      return;
    }

    // create output stream for logging in hadoop.job.history.location
    int defaultBufferSize = logDirFs.getConf().getInt("io.file.buffer.size", 4096);

    try {
      FSDataOutputStream out = null;
      PrintWriter writer = null;

      // In case the old JT is still running, but we can't connect to it, we
      // should ensure that it won't write to our (new JT's) job history file.
      if (logDirFs.exists(logFile)) {
        LOG.info("Remove the old history file " + logFile);
        logDirFs.delete(logFile, true);
      }

      out =
          logDirFs.create(
              logFile,
              new FsPermission(HISTORY_FILE_PERMISSION),
              true,
              defaultBufferSize,
              logDirFs.getDefaultReplication(),
              jobHistoryBlockSize,
              null);

      writer = new PrintWriter(out);

      fileManager.addWriter(jobId, writer);

      // cache it ...
      fileManager.setHistoryFile(jobId, logFile);

      writers = fileManager.getWriters(jobId);
      if (null != writers) {
        log(
            writers,
            RecordTypes.Meta,
            new Keys[] {Keys.VERSION},
            new String[] {String.valueOf(JobHistory.VERSION)});
      }

      String jobName = getJobName();
      String user = getUserName();

      // add to writer as well
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {
            Keys.JOBID, Keys.JOBNAME, Keys.USER, Keys.SUBMIT_TIME, Keys.JOBCONF, Keys.JOBTRACKERID
          },
          new String[] {
            jobId.toString(), jobName, user,
            String.valueOf(submitTime), jobConfPath, jobTrackerId
          });

    } catch (IOException e) {
      // Disable history if we have errors other than in the user log.
      disableHistory = true;
    }

    /* Storing the job conf on the log dir */
    Path jobFilePath = new Path(logDir, CoronaJobHistoryFilesManager.getConfFilename(jobId));
    fileManager.setConfFile(jobId, jobFilePath);
    FSDataOutputStream jobFileOut = null;
    try {
      if (!logDirFs.exists(jobFilePath)) {
        jobFileOut =
            logDirFs.create(
                jobFilePath,
                new FsPermission(HISTORY_FILE_PERMISSION),
                true,
                defaultBufferSize,
                logDirFs.getDefaultReplication(),
                logDirFs.getDefaultBlockSize(),
                null);
        conf.writeXml(jobFileOut);
        jobFileOut.close();
      }
    } catch (IOException ioe) {
      LOG.error("Failed to store job conf in the log dir", ioe);
    } finally {
      if (jobFileOut != null) {
        try {
          jobFileOut.close();
        } catch (IOException ie) {
          LOG.info(
              "Failed to close the job configuration file " + StringUtils.stringifyException(ie));
        }
      }
    }
  }