コード例 #1
0
  public void testComplexNameWithRegex() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();

    JobConf conf = createJobConf();
    conf.setJobName("name \\Evalue]");

    conf.setInputFormat(TextInputFormat.class);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IdentityMapper.class);

    FileInputFormat.setInputPaths(conf, getInputDir());

    FileOutputFormat.setOutputPath(conf, getOutputDir());

    JobClient.runJob(conf);

    Path[] outputFiles =
        FileUtil.stat2Paths(getFileSystem().listStatus(getOutputDir(), new OutputLogFilter()));
    assertEquals(1, outputFiles.length);
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    assertEquals("0\tb a", reader.readLine());
    assertNull(reader.readLine());
    reader.close();
  }
コード例 #2
0
ファイル: HdfsUtils.java プロジェクト: YzPaul3/ankus
 /**
  * 입력으로 선택한 경로를 지정한 목적 경로로 이동한다.
  *
  * @param source 이동할 경로
  * @param target 이동할 위치
  * @param fs Hadoop FileSystem
  */
 public static void move(String source, String target, FileSystem fs) throws Exception {
   Path srcPath = new Path(source);
   Path[] srcs = FileUtil.stat2Paths(fs.globStatus(srcPath), srcPath);
   Path dst = new Path(target);
   if (srcs.length > 1 && !fs.getFileStatus(dst).isDir()) {
     throw new FileSystemException(
         "When moving multiple files, destination should be a directory.");
   }
   for (int i = 0; i < srcs.length; i++) {
     if (!fs.rename(srcs[i], dst)) {
       FileStatus srcFstatus = null;
       FileStatus dstFstatus = null;
       try {
         srcFstatus = fs.getFileStatus(srcs[i]);
       } catch (FileNotFoundException e) {
         throw new FileNotFoundException(srcs[i] + ": No such file or directory");
       }
       try {
         dstFstatus = fs.getFileStatus(dst);
       } catch (IOException e) {
         // Nothing
       }
       if ((srcFstatus != null) && (dstFstatus != null)) {
         if (srcFstatus.isDir() && !dstFstatus.isDir()) {
           throw new FileSystemException(
               "cannot overwrite non directory " + dst + " with directory " + srcs[i]);
         }
       }
       throw new FileSystemException("Failed to rename " + srcs[i] + " to " + dst);
     }
   }
 }
コード例 #3
0
ファイル: HDFSTools.java プロジェクト: imclab/faunus
 public static boolean globDelete(final FileSystem fs, final String path, final boolean recursive)
     throws IOException {
   boolean deleted = false;
   for (final Path p : FileUtil.stat2Paths(fs.globStatus(new Path(path)))) {
     fs.delete(p, recursive);
     deleted = true;
   }
   return deleted;
 }
コード例 #4
0
ファイル: TestFileUtil.java プロジェクト: JoeChien23/hadoop
  @Test(timeout = 30000)
  public void testStat2Paths1() {
    assertNull(FileUtil.stat2Paths(null));

    FileStatus[] fileStatuses = new FileStatus[0];
    Path[] paths = FileUtil.stat2Paths(fileStatuses);
    assertEquals(0, paths.length);

    Path path1 = new Path("file://foo");
    Path path2 = new Path("file://moo");
    fileStatuses =
        new FileStatus[] {
          new FileStatus(3, false, 0, 0, 0, path1), new FileStatus(3, false, 0, 0, 0, path2)
        };
    paths = FileUtil.stat2Paths(fileStatuses);
    assertEquals(2, paths.length);
    assertEquals(paths[0], path1);
    assertEquals(paths[1], path2);
  }
コード例 #5
0
ファイル: HDFSTools.java プロジェクト: imclab/faunus
 public static Path getOutputsFinalJob(final FileSystem fs, final String output)
     throws IOException {
   int largest = -1;
   for (final Path path : FileUtil.stat2Paths(fs.listStatus(new Path(output)))) {
     final String[] name = path.getName().split(DASH);
     if (name.length == 2 && name[0].equals(Tokens.JOB)) {
       if (Integer.valueOf(name[1]) > largest) largest = Integer.valueOf(name[1]);
     }
   }
   if (largest == -1) return new Path(output);
   else return new Path(output + "/" + Tokens.JOB + "-" + largest);
 }
コード例 #6
0
 public static void fileTreeRecursion(URI uri, Configuration conf, FileSystem fs)
     throws IOException {
   Path current = new Path(uri);
   if (fs.isFile(current)) {
     visit(current, fs);
   } else {
     FileStatus[] status = fs.listStatus(current);
     Path[] paths = FileUtil.stat2Paths(status);
     for (Path p : paths) {
       fileTreeRecursion(p.toUri(), conf, fs);
     }
   }
 }
コード例 #7
0
ファイル: MapFileOutputFormat.java プロジェクト: giserh/mrgeo
  /** Open the output generated by this format. */
  public static MapFile.Reader[] getReaders(Path dir, Configuration conf) throws IOException {
    FileSystem fs = dir.getFileSystem(conf);
    Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));

    // sort names, so that hash partitioning works
    Arrays.sort(names);

    MapFile.Reader[] parts = new MapFile.Reader[names.length];
    for (int i = 0; i < names.length; i++) {
      parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
    }
    return parts;
  }
コード例 #8
0
ファイル: TestFileUtil.java プロジェクト: JoeChien23/hadoop
  @Test(timeout = 30000)
  public void testStat2Paths2() {
    Path defaultPath = new Path("file://default");
    Path[] paths = FileUtil.stat2Paths(null, defaultPath);
    assertEquals(1, paths.length);
    assertEquals(defaultPath, paths[0]);

    paths = FileUtil.stat2Paths(null, null);
    assertTrue(paths != null);
    assertEquals(1, paths.length);
    assertEquals(null, paths[0]);

    Path path1 = new Path("file://foo");
    Path path2 = new Path("file://moo");
    FileStatus[] fileStatuses =
        new FileStatus[] {
          new FileStatus(3, false, 0, 0, 0, path1), new FileStatus(3, false, 0, 0, 0, path2)
        };
    paths = FileUtil.stat2Paths(fileStatuses, defaultPath);
    assertEquals(2, paths.length);
    assertEquals(paths[0], path1);
    assertEquals(paths[1], path2);
  }
コード例 #9
0
  public static void main(String[] args) throws Exception {
    String uri = args[0];
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);

    Path[] paths = new Path[args.length];
    for (int i = 0; i < paths.length; i++) {
      paths[i] = new Path(args[i]);
    }

    FileStatus[] status = fs.listStatus(paths);
    Path[] listedPaths = FileUtil.stat2Paths(status);
    for (Path p : listedPaths) {
      System.out.println(p);
    }
  }
コード例 #10
0
ファイル: HDFSTools.java プロジェクト: imclab/faunus
  public static void decompressPath(
      final FileSystem fs,
      final String in,
      final String out,
      final String compressedFileSuffix,
      final boolean deletePrevious)
      throws IOException {
    final Path inPath = new Path(in);

    if (fs.isFile(inPath)) HDFSTools.decompressFile(fs, in, out, deletePrevious);
    else {
      final Path outPath = new Path(out);
      if (!fs.exists(outPath)) fs.mkdirs(outPath);
      for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FOWARD_ASTERISK)))) {
        if (path.getName().endsWith(compressedFileSuffix))
          HDFSTools.decompressFile(
              fs,
              path.toString(),
              outPath.toString() + FOWARD_SLASH + path.getName().split("\\.")[0],
              deletePrevious);
      }
    }
  }
コード例 #11
0
  @Test
  public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8);
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    jobConf.set("jobclient.output.filter", "ALL");
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger
      // and set breakpoints
      jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
      // local job runner has a couple of limitations: only one reducer is supported and the
      // DistributedCache doesn't work.
      // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
      maxReducers = 1;
      shards = 1;
    }

    String[] args =
        new String[] {
          "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf",
          "--morphline-id=morphline1",
          "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
          "--output-dir=" + outDir.toString(),
          "--shards=" + shards,
          "--verbose",
          numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
          numRuns % 3 == 0
              ? "--reducers=" + shards
              : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers))
        };
    if (numRuns % 3 == 2) {
      args = concat(args, new String[] {"--fanout=2"});
    }
    if (numRuns == 0) {
      // force (slow) MapReduce based randomization to get coverage for that as well
      args =
          concat(
              new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"},
              args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
      // Only run this check if mtree merge is disabled.
      // With mtree merge enabled the BatchWriter counters aren't available anymore because
      // variable "job" now refers to the merge job rather than the indexing job
      assertEquals(
          "Invalid counter "
              + SolrRecordWriter.class.getName()
              + "."
              + SolrCounters.DOCUMENTS_WRITTEN,
          count,
          job.getCounters()
              .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
              .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:
    tool = createTool();
    args = concat(args, new String[] {"--dry-run"});
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
  }
コード例 #12
0
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
    Configuration conf = job.getConfiguration();

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
      minSizeNode = minSplitSizeNode;
    } else {
      minSizeNode = conf.getLong("mapred.min.split.size.per.node", 0);
    }
    if (minSplitSizeRack != 0) {
      minSizeRack = minSplitSizeRack;
    } else {
      minSizeRack = conf.getLong("mapred.min.split.size.per.rack", 0);
    }
    if (maxSplitSize != 0) {
      maxSize = maxSplitSize;
    } else {
      maxSize = conf.getLong("mapred.max.split.size", 0);
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
      throw new IOException(
          "Minimum split size pernode "
              + minSizeNode
              + " cannot be larger than maximum split size "
              + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
      throw new IOException(
          "Minimum split size per rack"
              + minSizeRack
              + " cannot be larger than maximum split size "
              + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
      throw new IOException(
          "Minimum split size per node"
              + minSizeNode
              + " cannot be smaller than minimum split "
              + "size per rack "
              + minSizeRack);
    }

    // all the files in input set
    Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0]));
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (paths.length == 0) {
      return splits;
    }

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
      ArrayList<Path> myPaths = new ArrayList<Path>();

      // pick one input path. If it matches all the filters in a pool,
      // add it to the output set
      for (int i = 0; i < paths.length; i++) {
        if (paths[i] == null) { // already processed
          continue;
        }
        Path p = new Path(paths[i].toUri().getPath());
        if (onepool.accept(p)) {
          myPaths.add(paths[i]); // add it to my output set
          paths[i] = null; // already processed
        }
      }
      // create splits for all files in this pool.
      getMoreSplits(
          conf,
          myPaths.toArray(new Path[myPaths.size()]),
          maxSize,
          minSizeNode,
          minSizeRack,
          splits);
    }

    // Finally, process all paths that do not belong to any pool.
    ArrayList<Path> myPaths = new ArrayList<Path>();
    for (int i = 0; i < paths.length; i++) {
      if (paths[i] == null) { // already processed
        continue;
      }
      myPaths.add(paths[i]);
    }
    // create splits for all files that are not in any pool.
    getMoreSplits(
        conf, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits;
  }
コード例 #13
0
ファイル: TestSymLink.java プロジェクト: hmilxin/hadoop
  public void testSymLink() {
    try {
      boolean mayExit = false;
      MiniMRCluster mr = null;
      MiniDFSCluster dfs = null;
      try {
        Configuration conf = new Configuration();
        dfs = new MiniDFSCluster(conf, 1, true, null);
        FileSystem fileSys = dfs.getFileSystem();
        String namenode = fileSys.getUri().toString();
        mr = new MiniMRCluster(1, namenode, 3);
        // During tests, the default Configuration will use a local mapred
        // So don't specify -config or -cluster
        String strJobtracker = "mapred.job.tracker=" + "localhost:" + mr.getJobTrackerPort();
        String strNamenode = "fs.default.name=" + namenode;
        String argv[] =
            new String[] {
              "-input", INPUT_FILE,
              "-output", OUTPUT_DIR,
              "-mapper", map,
              "-reducer", reduce,
              // "-verbose",
              // "-jobconf", "stream.debug=set"
              "-jobconf", strNamenode,
              "-jobconf", strJobtracker,
              "-jobconf", "stream.tmpdir=" + System.getProperty("test.build.data", "/tmp"),
              "-jobconf",
                  "mapred.child.java.opts=-Dcontrib.name="
                      + System.getProperty("contrib.name")
                      + " "
                      + "-Dbuild.test="
                      + System.getProperty("build.test")
                      + " "
                      + conf.get("mapred.child.java.opts", ""),
              "-cacheFile", fileSys.getUri() + CACHE_FILE + "#testlink"
            };

        fileSys.delete(new Path(OUTPUT_DIR), true);

        DataOutputStream file = fileSys.create(new Path(INPUT_FILE));
        file.writeBytes(mapString);
        file.close();
        file = fileSys.create(new Path(CACHE_FILE));
        file.writeBytes(cacheString);
        file.close();

        job = new StreamJob(argv, mayExit);
        job.go();

        fileSys = dfs.getFileSystem();
        String line = null;
        Path[] fileList =
            FileUtil.stat2Paths(fileSys.listStatus(new Path(OUTPUT_DIR), new OutputLogFilter()));
        for (int i = 0; i < fileList.length; i++) {
          System.out.println(fileList[i].toString());
          BufferedReader bread =
              new BufferedReader(new InputStreamReader(fileSys.open(fileList[i])));
          line = bread.readLine();
          System.out.println(line);
        }
        assertEquals(cacheString + "\t", line);
      } finally {
        if (dfs != null) {
          dfs.shutdown();
        }
        if (mr != null) {
          mr.shutdown();
        }
      }

    } catch (Exception e) {
      failTrace(e);
    }
  }
コード例 #14
0
ファイル: TestHdfsSink.java プロジェクト: jorson/metis
  public void doTestTextBatchAppend() throws Exception {
    LOG.debug("Starting...");

    final long rollCount = 10;
    final long batchSize = 2;
    final String fileName = "PageView";

    String newPath = testPath + "/singleTextBucket";
    int totalEvents = 0;
    int i = 1, j = 1;

    // clear the test directory
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path(newPath);
    fs.delete(dirPath, true);
    fs.mkdirs(dirPath);

    Context context = new Context();
    context.put("hdfs.path", newPath);
    context.put("hdfs.rollCount", String.valueOf(rollCount));
    context.put("hdfs.batchSize", String.valueOf(batchSize));
    context.put("hdfs.filePrefix", "pageview");

    Channel channel = new MemoryChannel();
    Configurables.configure(channel, context);
    sink.setChannel(channel);
    sink.start();

    Calendar eventDate = Calendar.getInstance();
    Date currentDate = new Date();
    Map<String, String> header = new HashMap<String, String>();
    header.put("topic", "PageView");

    List<String> bodies = Lists.newArrayList();

    // 将测试的事件推入到通道中
    for (i = 1; i <= (rollCount * 10) / batchSize; i++) {
      Transaction txn = channel.getTransaction();
      txn.begin();
      for (j = 1; j <= batchSize; j++) {
        header.put("timestamp", String.valueOf(currentDate.getTime()));
        Event event = new SimpleEvent();
        eventDate.clear();
        eventDate.set(2014, i, i, i, 0);
        String body = "Test." + i + "." + j;

        event.setHeaders(header);
        event.setBody(body.getBytes());
        bodies.add(body);
        channel.put(event);
        totalEvents++;
      }
      txn.commit();
      txn.close();

      // execute sink to process the events
      sink.process();
    }
    sink.stop();

    FileStatus[] dirStat = fs.listStatus(dirPath);
    Path fList[] = FileUtil.stat2Paths(dirStat);

    long expectedFiles = totalEvents / rollCount;
    if (totalEvents % rollCount > 0) {
      expectedFiles++;
    }

    Assert.assertEquals(
        "num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length);
    // 检查所有写入文件的内容
    verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
  }