コード例 #1
0
ファイル: DistCp.java プロジェクト: sunilkalva/conduit
  /**
   * Setup output format appropriately
   *
   * @param job - Job handle
   * @throws IOException - Exception if any
   */
  private void configureOutputFormat(Job job) throws IOException {
    final Configuration configuration = job.getConfiguration();
    Path targetPath = inputOptions.getTargetPath();
    targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration));

    if (inputOptions.shouldAtomicCommit()) {
      Path workDir = inputOptions.getAtomicWorkPath();
      if (workDir == null) {
        workDir = targetPath.getParent();
      }
      workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt());
      FileSystem workFS = workDir.getFileSystem(configuration);
      FileSystem targetFS = targetPath.getFileSystem(configuration);
      if (!DistCpUtils.compareFs(targetFS, workFS)) {
        throw new IllegalArgumentException(
            "Work path "
                + workDir
                + " and target path "
                + targetPath
                + " are in different file system");
      }
      CopyOutputFormat.setWorkingDirectory(job, workDir);
    } else {
      CopyOutputFormat.setWorkingDirectory(job, targetPath);
    }
    CopyOutputFormat.setCommitDirectory(job, targetPath);

    Path counterFilePath = inputOptions.getOutPutDirectory();
    if (counterFilePath == null) {
      LOG.error("Output directory is null for distcp");
    } else {
      LOG.info("DistCp output directory path: " + counterFilePath);
      CopyOutputFormat.setOutputPath(job, counterFilePath);
    }
  }
コード例 #2
0
  public boolean refresh(final Path path) throws IOException {
    try (FileSystem fs = path.getFileSystem(new Configuration())) {
      if (_fileStatus.isPresent()) {
        Optional<FileStatus> oldStatus = this._fileStatus;
        try {
          Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path));
          this.exists = newStatus.isPresent();

          return (oldStatus.isPresent() != this._fileStatus.isPresent()
              || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime()
              || oldStatus.get().isDirectory() != newStatus.get().isDirectory()
              || oldStatus.get().getLen() != newStatus.get().getLen());
        } catch (FileNotFoundException e) {
          _fileStatus = Optional.absent();
          this.exists = false;
          return true;
        }
      } else {
        if (path.getFileSystem(new Configuration()).exists(path)) {
          _fileStatus = Optional.of(fs.getFileStatus(path));
          return true;
        } else {
          return false;
        }
      }
    }
  }
コード例 #3
0
ファイル: DistCp.java プロジェクト: neutronsharc/hdfsbackup
  private static void finalize(
      Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes)
      throws IOException {
    if (presevedAttributes == null) {
      return;
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER)
        && !preseved.contains(FileAttribute.GROUP)
        && !preseved.contains(FileAttribute.PERMISSION)) {
      return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
      in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
      Text dsttext = new Text();
      FilePair pair = new FilePair();
      for (; in.next(dsttext, pair); ) {
        Path absdst = new Path(destPath, pair.output);
        updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
      }
    } finally {
      checkAndClose(in);
    }
  }
コード例 #4
0
ファイル: Main.java プロジェクト: Rugal/hadoop-join
 public static void runJob(String... args) throws Exception {
   Path smallFilePath = new Path(args[0]);
   Configuration conf = new Configuration();
   FileSystem fs = smallFilePath.getFileSystem(conf);
   FileStatus smallFilePathStatus = fs.getFileStatus(smallFilePath);
   if (smallFilePathStatus.isDir()) {
     for (FileStatus f : fs.listStatus(smallFilePath)) {
       if (f.getPath().getName().contains(args[0])) {
         DistributedCache.addCacheFile(f.getPath().toUri(), conf);
       }
     }
   } else {
     DistributedCache.addCacheFile(smallFilePath.toUri(), conf);
   }
   Path inputPath = new Path(args[1]);
   Path outputPath = new Path(args[2]);
   Job job = new Job(conf);
   job.setJarByClass(Main.class);
   job.setMapperClass(GenericReplicatedJoin.class);
   job.setInputFormatClass(KeyValueTextInputFormat.class);
   job.setNumReduceTasks(0);
   outputPath.getFileSystem(conf).delete(outputPath, true);
   FileInputFormat.setInputPaths(job, inputPath);
   FileOutputFormat.setOutputPath(job, outputPath);
   job.waitForCompletion(true);
 }
コード例 #5
0
ファイル: MockClusterTest.java プロジェクト: yonglehou/reair
  /**
   * Setup to do for each unit test.
   *
   * @throws IOException if there's an error accessing the local file system
   */
  @Before
  public void setUp() throws IOException {
    srcMetastore = new MockHiveMetastoreClient();
    destMetastore = new MockHiveMetastoreClient();

    srcLocalTmp.create();
    destLocalTmp.create();

    final Path srcFsRoot = new Path("file://" + srcLocalTmp.getRoot().getAbsolutePath());
    final Path destFsRoot = new Path("file://" + destLocalTmp.getRoot().getAbsolutePath());

    srcWarehouseRoot = new Path(makeFileUri(srcLocalTmp), "warehouse");
    destWarehouseRoot = new Path(makeFileUri(destLocalTmp), "warehouse");

    srcWarehouseRoot.getFileSystem(conf).mkdirs(srcWarehouseRoot);
    destWarehouseRoot.getFileSystem(conf).mkdirs(destWarehouseRoot);

    LOG.info(String.format("src root: %s, dest root: %s", srcWarehouseRoot, destWarehouseRoot));

    final Path srcTmp = new Path(makeFileUri(this.srcLocalTmp), "tmp");
    final Path destTmp = new Path(makeFileUri(this.destLocalTmp), "tmp");

    srcCluster = new MockCluster("src_cluster", srcMetastore, srcFsRoot, srcTmp);
    destCluster = new MockCluster("dest_cluster", destMetastore, destFsRoot, destTmp);

    // Disable checking of modified times as the local filesystem does not
    // support this
    directoryCopier = new DirectoryCopier(conf, destCluster.getTmpDir(), false);
  }
コード例 #6
0
ファイル: DagUtils.java プロジェクト: sushrutikhar/hive
 /**
  * @param src the source file.
  * @param dest the destination file.
  * @param conf the configuration
  * @return true if the file names match else returns false.
  * @throws IOException when any file system related call fails
  */
 private boolean checkPreExisting(Path src, Path dest, Configuration conf) throws IOException {
   FileSystem destFS = dest.getFileSystem(conf);
   FileSystem sourceFS = src.getFileSystem(conf);
   if (destFS.exists(dest)) {
     return (sourceFS.getFileStatus(src).getLen() == destFS.getFileStatus(dest).getLen());
   }
   return false;
 }
コード例 #7
0
 @Test
 public void testRhget() throws Exception {
   final Path tmpPath = new Path("hdfs://localhost:9000/tmp/junit-test2");
   Assert.assertTrue(tmpPath.getFileSystem(personalServer.getConf()).mkdirs(tmpPath));
   // todo: why not use this instead of what is in rhget?
   tmpPath
       .getFileSystem(personalServer.getConf())
       .copyToLocalFile(true, tmpPath, new Path("/tmp/junit-test2"));
   //        personalServer.rhget(tmpPath.toString(),"hdfs://localhost:9000/tmp/copy");
   //        personalServer.rhdel(tmpPath.toString());
 }
コード例 #8
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, "ClientUserInstallMR");
    job.setJarByClass(ClientUserInstallMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");

    String tmpDir = outputDir + "_tmp";
    Path tmpOutput = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOutput);
    tmpOutput.getFileSystem(conf).delete(tmpOutput, true);

    job.setMapperClass(ClientUserInstallFirstMapper.class);
    job.setReducerClass(ClientUserInstallFirstReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(30);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job secondJob = new Job(conf, "ClientUserInstallResult");
      secondJob.setJarByClass(ClientUserInstallMR.class);
      conf.set("stat_date", conf.get("stat_date"));

      FileInputFormat.addInputPath(secondJob, new Path(tmpDir));
      Path output = new Path(outputDir);
      FileOutputFormat.setOutputPath(secondJob, output);
      output.getFileSystem(conf).delete(output, true);

      secondJob.setMapperClass(ClientUserInstallSecondMapper.class);
      secondJob.setReducerClass(ClientUserInstallSecondReduce.class);

      secondJob.setInputFormatClass(KeyValueTextInputFormat.class);
      secondJob.setOutputFormatClass(TextOutputFormat.class);
      secondJob.setOutputKeyClass(Text.class);
      secondJob.setOutputValueClass(Text.class);

      secondJob.setNumReduceTasks(1);

      code = secondJob.waitForCompletion(true) ? 0 : 1;
    }
    FileSystem.get(conf).delete(tmpOutput, true);
    System.exit(code);
    return code;
  }
コード例 #9
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }
コード例 #10
0
  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext taskAttemptContext)
      throws IOException {
    context = taskAttemptContext;
    FileSplit fileSplit = (FileSplit) genericSplit;
    lzoFile = fileSplit.getPath();
    // The LzoSplitInputFormat is not splittable, so the split length is the whole file.
    totalFileSize = fileSplit.getLength();

    // Jump through some hoops to create the lzo codec.
    Configuration conf = CompatibilityUtil.getConfiguration(context);
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(lzoFile);
    ((Configurable) codec).setConf(conf);

    LzopDecompressor lzopDecompressor = (LzopDecompressor) codec.createDecompressor();
    FileSystem fs = lzoFile.getFileSystem(conf);
    rawInputStream = fs.open(lzoFile);

    // Creating the LzopInputStream here just reads the lzo header for us, nothing more.
    // We do the rest of our input off of the raw stream is.
    codec.createInputStream(rawInputStream, lzopDecompressor);

    // This must be called AFTER createInputStream is called, because createInputStream
    // is what reads the header, which has the checksum information.  Otherwise getChecksumsCount
    // erroneously returns zero, and all block offsets will be wrong.
    numCompressedChecksums = lzopDecompressor.getCompressedChecksumsCount();
    numDecompressedChecksums = lzopDecompressor.getDecompressedChecksumsCount();
  }
コード例 #11
0
ファイル: WriteUsingMR.java プロジェクト: ajay0221/parquet-mr
  public Path write(Message... messages) throws Exception {

    synchronized (WriteUsingMR.class) {
      outputPath = TestUtils.someTemporaryFilePath();

      Path inputPath = TestUtils.someTemporaryFilePath();
      FileSystem fileSystem = inputPath.getFileSystem(conf);
      fileSystem.create(inputPath);

      inputMessages = Collections.unmodifiableList(Arrays.asList(messages));

      final Job job = new Job(conf, "write");

      // input not really used
      TextInputFormat.addInputPath(job, inputPath);
      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(WritingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(ProtoParquetOutputFormat.class);
      ProtoParquetOutputFormat.setOutputPath(job, outputPath);
      ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages));

      waitForJob(job);

      inputMessages = null;
      return outputPath;
    }
  }
コード例 #12
0
  @Before
  public void setUp() throws Exception {
    // create local Pig server
    pigServer = UnitTestUtil.makePigServer();

    // create temp SequenceFile
    File tempFile = File.createTempFile("test", ".txt");
    tempFilename = tempFile.getAbsolutePath();
    Path path = new Path("file:///" + tempFilename);
    Configuration conf = new Configuration();
    FileSystem fs = path.getFileSystem(conf);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
      for (int i = 0; i < DATA.length; ++i) {
        key.set(i);
        value.set(DATA[i]);
        writer.append(key, value);
      }
    } finally {
      IOUtils.closeStream(writer);
    }
  }
コード例 #13
0
  protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {
      Path partitionDirPath = new Path(part.getSd().getLocation());
      FileStatus[] partitionContent =
          partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
      for (FileStatus currStatus : partitionContent) {
        if (!currStatus.isDir()) {
          if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
            result.add(currStatus.getPath().toUri().getPath());
          } else {
            LOG.debug(
                "Ignoring path {} since matches ignore regex {}",
                currStatus.getPath().toUri().getPath(),
                ignoreFileRegex);
          }
        }
      }

    } catch (IOException e) {
      logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
  }
コード例 #14
0
  /** debugging TODO remove */
  private void readOutputFiles(String jobName, Path outDir) throws IOException {

    FileSystem fs = outDir.getFileSystem(getConf());
    if (!fs.getFileStatus(outDir).isDir()) {
      throw new IOException(outDir.toString() + " is not a directory");
    }

    FileStatus[] files = fs.listStatus(outDir);

    for (FileStatus f : files) {
      Path fPath = f.getPath();
      if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
        LOG.info("opening " + fPath.toString());
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, fPath, getConf());
        Text key = new Text();
        Text value = new Text();
        while (reader.next(key, value)) {
          LOG.info("read " + f.getPath().toString());
          LOG.info("read: k=" + key.toString() + " v=" + value.toString());
        }
        LOG.info("done reading " + fPath.toString());

        reader.close();
      }
    }
  }
コード例 #15
0
  @Override
  protected synchronized void startInternal() throws Exception {
    // create filesystem only now, as part of service-start. By this time, RM is
    // authenticated with kerberos so we are good to create a file-system
    // handle.
    fsConf = new Configuration(getConfig());
    fsConf.setBoolean("dfs.client.retry.policy.enabled", true);
    String retryPolicy =
        fsConf.get(
            YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC,
            YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC);
    fsConf.set("dfs.client.retry.policy.spec", retryPolicy);

    String scheme = fsWorkingPath.toUri().getScheme();
    if (scheme == null) {
      scheme = FileSystem.getDefaultUri(fsConf).getScheme();
    }
    if (scheme != null) {
      String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
      fsConf.setBoolean(disableCacheName, true);
    }

    fs = fsWorkingPath.getFileSystem(fsConf);
    mkdirsWithRetries(rmDTSecretManagerRoot);
    mkdirsWithRetries(rmAppRoot);
    mkdirsWithRetries(amrmTokenSecretManagerRoot);
    mkdirsWithRetries(reservationRoot);
  }
コード例 #16
0
  @Override
  public RecordWriter<IEtlKey, CamusWrapper> getDataRecordWriter(
      TaskAttemptContext context, String fileName, CamusWrapper data, FileOutputCommitter committer)
      throws IOException, InterruptedException {

    // If recordDelimiter hasn't been initialized, do so now
    if (recordDelimiter == null) {
      recordDelimiter =
          context.getConfiguration().get(ETL_OUTPUT_RECORD_DELIMITER, DEFAULT_RECORD_DELIMITER);
    }

    // Get the filename for this RecordWriter.
    Path path =
        new Path(
            committer.getWorkPath(),
            EtlMultiOutputFormat.getUniqueFile(context, fileName, getFilenameExtension()));

    //		final FSDataOutputStream writer =
    // path.getFileSystem(context.getConfiguration()).create(path);

    FileSystem fs = path.getFileSystem(context.getConfiguration());
    DataOutputStream writer; // = fs.create(path, false);
    if (isCompressed) {
      return new ByteRecordWriter(
          new DataOutputStream(codec.createOutputStream(fs.create(path, false))), recordDelimiter);
    } else {
      return new ByteRecordWriter(fs.create(path, false), recordDelimiter);
    }
  }
コード例 #17
0
ファイル: BloomMapFile.java プロジェクト: ukulililixl/core
 public Writer(Configuration conf, Path dir, SequenceFile.Writer.Option... options)
     throws IOException {
   super(conf, dir, options);
   this.fs = dir.getFileSystem(conf);
   this.dir = dir;
   initBloomFilter(conf);
 }
コード例 #18
0
  @Override
  public int run(String[] arg0) throws Exception {
    Job job = Job.getInstance(getConf(), "PopulationJob");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(Population.class);

    Path out = new Path("totalorder");
    FileInputFormat.setInputPaths(job, "populations");
    FileOutputFormat.setOutputPath(job, out);
    out.getFileSystem(conf).delete(out, true);

    job.setMapperClass(PopulationMapper.class);
    job.setReducerClass(PopulationReducer.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(5);

    // Configure the TotalOrderPartitioner here...
    job.setPartitionerClass(TotalOrderPartitioner.class);
    InputSampler.Sampler<Text, Text> sampler =
        new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3);
    InputSampler.writePartitionFile(job, sampler);
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    job.addCacheFile(partitionURI);

    return job.waitForCompletion(true) ? 0 : 1;
  }
コード例 #19
0
  /**
   * Configures the partitioner for generating HFiles.
   *
   * <p>Each generated HFile should fit within a region of of the target table. Additionally, it's
   * optimal to have only one HFile to load into each region, since a read from that region will
   * require reading from each HFile under management (until compaction happens and merges them all
   * back into one HFile).
   *
   * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the records output
   * from the Mapper based on their rank in a total ordering of the keys. The <code>startKeys</code>
   * argument should contain a list of the first key in each of those partitions.
   *
   * @param job The job to configure.
   * @param startKeys A list of keys that will mark the boundaries between the partitions for the
   *     sorted map output records.
   * @throws IOException If there is an error.
   */
  private static void configurePartitioner(Job job, List<HFileKeyValue> startKeys)
      throws IOException {
    job.setPartitionerClass(TotalOrderPartitioner.class);

    LOG.info("Configuring " + startKeys.size() + " reduce partitions.");
    job.setNumReduceTasks(startKeys.size());

    // Write the file that the TotalOrderPartitioner reads to determine where to partition records.
    Path partitionFilePath =
        new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionFilePath);

    final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration());
    partitionFilePath = partitionFilePath.makeQualified(fs);
    writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys);

    // Add it to the distributed cache.
    try {
      final URI cacheUri =
          new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
      DistributedCache.addCacheFile(cacheUri, job.getConfiguration());
    } catch (URISyntaxException e) {
      throw new IOException(e);
    }
    DistributedCache.createSymlink(job.getConfiguration());
  }
コード例 #20
0
ファイル: JobInputFile.java プロジェクト: genouest/hadoopizer
  /**
   * Return a list of all urls matching this input. If autocomplete is false, the list contains only
   * 1 element (same as getUrl()). Otherwise, it will try to return all the files beginning with
   * what is returned by getUrl().
   *
   * @param jobConf A Configuration object
   * @return the list of input url
   */
  public HashSet<URI> getAllUrls(Configuration jobConf) {

    HashSet<URI> urls = new HashSet<URI>();

    if (!isAutoComplete()) {
      urls.add(url);
    } else {
      Path basePath = new Path(url);
      String filePrefix = basePath.getName();

      try {
        FileSystem fs = basePath.getFileSystem(jobConf);

        if (!fs.exists(basePath.getParent())) {
          throw new IOException("Input directory not found: " + url);
        }

        FileStatus[] stats = fs.listStatus(basePath.getParent());

        for (int i = 0; i < stats.length; i++) {
          Path path = stats[i].getPath();
          if (fs.isFile(path) && path.getName().startsWith(filePrefix)) urls.add(path.toUri());
        }
      } catch (IOException e) {
        System.err.println("Unable to autocomplete input file");
        e.printStackTrace();
        System.exit(1);
      }
    }

    return urls;
  }
コード例 #21
0
 /**
  * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a
  * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes
  * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}.
  */
 @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
 public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
     throws IOException, ClassNotFoundException, InterruptedException {
   Configuration conf = job.getConfiguration();
   final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
   int numPartitions = job.getNumReduceTasks();
   K[] samples = sampler.getSample(inf, job);
   RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
   Arrays.sort(samples, comparator);
   Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
   FileSystem fs = dst.getFileSystem(conf);
   if (fs.exists(dst)) {
     fs.delete(dst, false);
   }
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   float stepSize = samples.length / (float) numPartitions;
   int last = -1;
   for (int i = 1; i < numPartitions; ++i) {
     int k = Math.round(stepSize * i);
     while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
       ++k;
     }
     writer.append(samples[k], nullValue);
     last = k;
   }
   writer.close();
 }
コード例 #22
0
  public static void run(Configuration conf, Path input, String outputFile)
      throws IOException, InstantiationException, IllegalAccessException {
    Writer writer;
    if (outputFile == null) {
      writer = new OutputStreamWriter(System.out);
    } else {
      writer =
          new OutputStreamWriter(
              new FileOutputStream(new File(outputFile)), Charset.forName("UTF-8"));
    }

    try {
      FileSystem fs = input.getFileSystem(conf);
      for (FileStatus fst : fs.listStatus(input, new DataPathFilter())) {
        Path dataPath = fst.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf);
        try {
          Text key = reader.getKeyClass().asSubclass(Text.class).newInstance();
          DocumentMapping value = new DocumentMapping();
          while (reader.next(key, value)) {
            String docId = value.getDocId();
            writer.write(docId + "\t" + key + "\n");
          }
        } finally {
          reader.close();
        }
      }
    } finally {
      writer.close();
    }
  }
コード例 #23
0
    public QseqRecordReader(Configuration conf, FileSplit split) throws IOException {
      setConf(conf);
      file = split.getPath();
      start = split.getStart();
      end = start + split.getLength();

      FileSystem fs = file.getFileSystem(conf);
      FSDataInputStream fileIn = fs.open(file);

      CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
      CompressionCodec codec = codecFactory.getCodec(file);

      if (codec == null) // no codec.  Uncompressed file.
      {
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
      } else { // compressed file
        if (start != 0)
          throw new RuntimeException(
              "Start position for compressed file is not 0! (found " + start + ")");

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
      }

      lineReader = new LineReader(inputStream);
    }
コード例 #24
0
 private static FileSystem fsForPath(Path dataPath, Configuration conf) {
   try {
     return dataPath.getFileSystem(conf);
   } catch (IOException ex) {
     throw new DatasetRepositoryException("Cannot get FileSystem for descriptor", ex);
   }
 }
コード例 #25
0
ファイル: ExplainSQRewriteTask.java プロジェクト: Leolh/hive
  @Override
  public int execute(DriverContext driverContext) {

    PrintStream out = null;
    try {
      Path resFile = new Path(work.getResFile());
      OutputStream outS = resFile.getFileSystem(conf).create(resFile);
      out = new PrintStream(outS);

      QB qb = work.getQb();
      TokenRewriteStream stream = work.getCtx().getTokenRewriteStream();
      String program = "sq rewrite";
      ASTNode ast = work.getAst();

      try {
        addRewrites(stream, qb, program, out);
        out.println(
            "\nRewritten Query:\n"
                + stream.toString(program, ast.getTokenStartIndex(), ast.getTokenStopIndex()));
      } finally {
        stream.deleteProgram(program);
      }

      out.close();
      out = null;
      return (0);
    } catch (Exception e) {
      console.printError(
          "Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
      return (1);
    } finally {
      IOUtils.closeStream(out);
    }
  }
コード例 #26
0
ファイル: DistRaid.java プロジェクト: fire9/hadoop-20
    /** Run a FileOperation */
    public void map(
        Text key,
        PolicyInfo policy,
        OutputCollector<WritableComparable, Text> out,
        Reporter reporter)
        throws IOException {
      this.reporter = reporter;
      try {
        LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
        Path p = new Path(key.toString());
        FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p);
        st.clear();
        RaidNode.doRaid(jobconf, policy, fs, st, reporter);

        ++succeedcount;

        reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks);
        reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize);
        reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks);
        reporter.incrCounter(Counter.META_SIZE, st.metaSize);

        reporter.incrCounter(Counter.FILES_SUCCEEDED, 1);
      } catch (IOException e) {
        ++failcount;
        reporter.incrCounter(Counter.FILES_FAILED, 1);

        String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e);
        out.collect(null, new Text(s));
        LOG.info(s);
      } finally {
        reporter.setStatus(getCountString());
      }
    }
コード例 #27
0
 /**
  * Add a {@link Path} to the list of inputs for the map-reduce job.
  *
  * @param job The {@link Job} to modify
  * @param path {@link Path} to be added to the list of inputs for the map-reduce job.
  */
 public static void addInputPath(Job job, Path path) throws IOException {
   Configuration conf = job.getConfiguration();
   path = path.getFileSystem(conf).makeQualified(path);
   String dirStr = StringUtils.escapeString(path.toString());
   String dirs = conf.get(INPUT_DIR);
   conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
 }
コード例 #28
0
ファイル: DistRaid.java プロジェクト: fire9/hadoop-20
  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws IOException
   */
  private boolean setup() throws IOException {
    estimateSavings();

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    // The control file should have small size blocks. This helps
    // in spreading out the load from mappers that will be spawned.
    jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter =
          SequenceFile.createWriter(
              fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE);
      for (RaidPolicyPathPair p : raidPolicyPathPairList) {
        // If a large set of files are Raided for the first time, files
        // in the same directory that tend to have the same size will end up
        // with the same map. This shuffle mixes things up, allowing a better
        // mix of files.
        java.util.Collections.shuffle(p.srcPaths);
        for (FileStatus st : p.srcPaths) {
          opWriter.append(new Text(st.getPath().toString()), p.policy);
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        }
      }

    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }
    raidPolicyPathPairList.clear();

    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info("Number of files=" + opCount);
    jobconf.setNumMapTasks(
        getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return opCount != 0;
  }
コード例 #29
0
ファイル: JobHelper.java プロジェクト: okrische/druid
  public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) {
    String failedMessage = null;
    for (Jobby job : jobs) {
      if (failedMessage == null) {
        if (!job.run()) {
          failedMessage = String.format("Job[%s] failed!", job.getClass());
        }
      }
    }

    if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
      if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
        Path workingPath = config.makeIntermediatePath();
        log.info("Deleting path[%s]", workingPath);
        try {
          workingPath
              .getFileSystem(injectSystemProperties(new Configuration()))
              .delete(workingPath, true);
        } catch (IOException e) {
          log.error(e, "Failed to cleanup path[%s]", workingPath);
        }
      }
    }

    if (failedMessage != null) {
      throw new ISE(failedMessage);
    }

    return true;
  }
コード例 #30
0
 public void store(Path output) throws IOException {
   FileSystem fs = output.getFileSystem(HadoopUtils.createConfiguration());
   fs.delete(output, true);
   OutputStream os = fs.create(output, true);
   store(os);
   os.close();
 }