Ejemplos de FileSystem.listFiles en Java

Lenguaje de programación: Java

Namespace/Package Name: org.apache.hadoop.fs

Clase / Tipo: FileSystem

Método / Función: listFiles

Ejemplos en hotexamples.com: 7

Java FileSystem.listFiles - 7 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de org.apache.hadoop.fs.FileSystem.listFiles extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

makeQualified(30)

delete(30)

getFileStatus(30)

getLocal(30)

setPermission(30)

get(30)

exists(30)

rename(30)

create(30)

getConf(30)

open(30)

copyFromLocalFile(30)

getUri(30)

globStatus(30)

close(30)

listStatus(30)

mkdirs(30)

createNewFile(26)

isFile(25)

getDefaultUri(22)

getWorkingDirectory(21)

copyToLocalFile(20)

getFileBlockLocations(19)

setDefaultUri(18)

isDirectory(16)

getContentSummary(14)

append(13)

deleteOnExit(12)

initialize(11)

newInstance(10)

getHomeDirectory(9)

setReplication(8)

setOwner(7)

listFiles(7)

getDefaultBlockSize(7)

closeAll(5)

getDefaultReplication(5)

getFileChecksum(4)

getClass(4)

addDelegationTokens(3)

listLocatedStatus(3)

listPaths(3)

getScheme(3)

moveFromLocalFile(2)

setTimes(2)

resolvePath(2)

setConf(2)

access(2)

setXAttr(2)

getXAttrs(2)

Ejemplo n.º 1

Mostrar archivo

Archivo: BulkIngestInputGenerationIT.java Proyecto: CameronWork/geowave

  private void verifyJobOutput() throws IOException {

    final String _SUCCESS = "_SUCCESS";
    final String REDUCER_OUTPUT = "part-r-";
    boolean wasSuccessful = false;
    boolean reducerOutputExists = false;
    FileSystem fs = FileSystem.getLocal(new Configuration());
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path(OUTPUT_PATH), false);
    LocatedFileStatus fileStatus = null;
    String fileName = null;

    while (iterator.hasNext()) {
      fileStatus = iterator.next();
      fileName = fileStatus.getPath().getName();

      if (fileName.contains(_SUCCESS)) {
        wasSuccessful = true;
      }
      if (fileName.contains(REDUCER_OUTPUT)) {
        reducerOutputExists = true;
      }
    }

    // verify presence of _SUCCESS file
    Assert.assertEquals(wasSuccessful, true);

    // verify presence of Reducer output
    Assert.assertEquals(reducerOutputExists, true);
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: FileDataGenNew.java Proyecto: hwx293926/HiBench

  private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset)
      throws IOException {
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
      Path path = rit.next().getPath();
      String filename =
          path.toString().substring(path.getParent().toString().length(), path.toString().length());

      if (filename.startsWith("/part-")) {
        long filesize = fs.getFileStatus(path).getLen();
        if (offset < filesize) {
          FSDataInputStream handle = fs.open(path);
          if (offset > 0) {
            handle.seek(offset);
          }
          fileHandleList.add(handle);
        }
        offset -= filesize;
      }
    }
    if (fileHandleList.size() == 1) return fileHandleList.get(0);
    else if (fileHandleList.size() > 1) {
      Enumeration<FSDataInputStream> enu = fileHandleList.elements();
      return new SequenceInputStream(enu);
    } else {
      System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!");
      return null;
    }
  }

Ejemplo n.º 3

Mostrar archivo

Archivo: PcapJob.java Proyecto: charlesporter/incubator-metron

 protected Iterable<Path> listFiles(FileSystem fs, Path basePath) throws IOException {
   List<Path> ret = new ArrayList<>();
   RemoteIterator<LocatedFileStatus> filesIt = fs.listFiles(basePath, true);
   while (filesIt.hasNext()) {
     ret.add(filesIt.next().getPath());
   }
   return ret;
 }

Ejemplo n.º 4

Mostrar archivo

Archivo: TraceBuilder.java Proyecto: RiseOfApes/hadoop

    /**
     * Processes the input file/folder argument. If the input is a file, then it is directly
     * considered for further processing by TraceBuilder. If the input is a folder, then all the
     * history logs in the input folder are considered for further processing.
     *
     * <p>If isRecursive is true, then the input path is recursively scanned for job history logs
     * for further processing by TraceBuilder.
     *
     * <p>NOTE: If the input represents a globbed path, then it is first flattened and then the
     * individual paths represented by the globbed input path are considered for further processing.
     *
     * @param input input path, possibly globbed
     * @param conf configuration
     * @param isRecursive whether to recursively traverse the input paths to find history logs
     * @return the input history log files' paths
     * @throws FileNotFoundException
     * @throws IOException
     */
    static List<Path> processInputArgument(String input, Configuration conf, boolean isRecursive)
        throws FileNotFoundException, IOException {
      Path inPath = new Path(input);
      FileSystem fs = inPath.getFileSystem(conf);
      FileStatus[] inStatuses = fs.globStatus(inPath);

      List<Path> inputPaths = new LinkedList<Path>();
      if (inStatuses == null || inStatuses.length == 0) {
        return inputPaths;
      }

      for (FileStatus inStatus : inStatuses) {
        Path thisPath = inStatus.getPath();
        if (inStatus.isDirectory()) {

          // Find list of files in this path(recursively if -recursive option
          // is specified).
          List<FileStatus> historyLogs = new ArrayList<FileStatus>();

          RemoteIterator<LocatedFileStatus> iter = fs.listFiles(thisPath, isRecursive);
          while (iter.hasNext()) {
            LocatedFileStatus child = iter.next();
            String fileName = child.getPath().getName();

            if (!(fileName.endsWith(".crc") || fileName.startsWith("."))) {
              historyLogs.add(child);
            }
          }

          if (historyLogs.size() > 0) {
            // Add the sorted history log file names in this path to the
            // inputPaths list
            FileStatus[] sortableNames = historyLogs.toArray(new FileStatus[historyLogs.size()]);
            Arrays.sort(sortableNames, new HistoryLogsComparator());

            for (FileStatus historyLog : sortableNames) {
              inputPaths.add(historyLog.getPath());
            }
          }
        } else {
          inputPaths.add(thisPath);
        }
      }

      return inputPaths;
    }

Ejemplo n.º 5

Mostrar archivo

Archivo: PcapJob.java Proyecto: charlesporter/incubator-metron

 private List<byte[]> readResults(Path outputPath, Configuration config, FileSystem fs)
     throws IOException {
   List<byte[]> ret = new ArrayList<>();
   for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(outputPath, false); it.hasNext(); ) {
     Path p = it.next().getPath();
     if (p.getName().equals("_SUCCESS")) {
       fs.delete(p, false);
       continue;
     }
     SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(p));
     LongWritable key = new LongWritable();
     BytesWritable value = new BytesWritable();
     while (reader.next(key, value)) {
       ret.add(value.copyBytes());
     }
     reader.close();
     fs.delete(p, false);
   }
   fs.delete(outputPath, false);
   if (LOG.isDebugEnabled()) {
     LOG.debug(outputPath + ": Returning " + ret.size());
   }
   return ret;
 }

Ejemplo n.º 6

Mostrar archivo

Archivo: RollingSinkITCase.java Proyecto: ktzoumas/incubator-flink

  /**
   * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to produce rolling
   * files. The clock of DateTimeBucketer is set to {@link ModifyableClock} to keep the time in
   * lockstep with the processing of elements using latches.
   */
  @Test
  public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source =
        env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast();

    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped =
        source.flatMap(
            new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
              private static final long serialVersionUID = 1L;

              int count = 0;

              @Override
              public void flatMap(Tuple2<Integer, String> value, Collector<String> out)
                  throws Exception {
                out.collect(value.f1);
                count++;
                if (count >= 5) {
                  if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                  } else {
                    latch2.trigger();
                  }
                  count = 0;
                }
              }
            });

    RollingSink<String> sink =
        new RollingSink<String>(outPath)
            .setBucketer(new DateTimeBucketer("ss"))
            .setPartPrefix("part")
            .setPendingPrefix("")
            .setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
      LocatedFileStatus file = files.next();
      numFiles++;
      if (file.getPath().toString().contains("rolling-out/00")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 0; i < 5; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/05")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 5; i < 10; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/10")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 10; i < 15; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else if (file.getPath().toString().contains("rolling-out/15")) {
        FSDataInputStream inStream = dfs.open(file.getPath());

        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

        for (int i = 15; i < 20; i++) {
          String line = br.readLine();
          Assert.assertEquals("message #" + i, line);
        }

        inStream.close();
      } else {
        Assert.fail("File " + file + " does not match any expected roll pattern.");
      }
    }

    Assert.assertEquals(8, numFiles);
  }

Ejemplo n.º 7

Mostrar archivo

Archivo: HadoopConverterJob.java Proyecto: AlexanderSaydakov/druid

  public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
      jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
      throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
      jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(
        JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
        JobHelper.distributedClassPath(
            getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
        job);

    Throwable throwable = null;
    try {
      job.submit();
      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
      final boolean success = job.waitForCompletion(true);
      if (!success) {
        final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
        if (reports != null) {
          for (final TaskReport report : reports) {
            log.error(
                "Error in task [%s] : %s",
                report.getTaskId(), Arrays.toString(report.getDiagnostics()));
          }
        }
        return null;
      }
      try {
        loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
        writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
      } catch (IOException ex) {
        log.error(ex, "Could not fetch counters");
      }
      final JobID jobID = job.getJobID();

      final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
      final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
      final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
      final List<Path> goodPaths = new ArrayList<>();
      while (it.hasNext()) {
        final LocatedFileStatus locatedFileStatus = it.next();
        if (locatedFileStatus.isFile()) {
          final Path myPath = locatedFileStatus.getPath();
          if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
            goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
          }
        }
      }
      if (goodPaths.isEmpty()) {
        log.warn("No good data found at [%s]", jobDir);
        return null;
      }
      final List<DataSegment> returnList =
          ImmutableList.copyOf(
              Lists.transform(
                  goodPaths,
                  new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                      try {
                        if (!fs.exists(input)) {
                          throw new ISE(
                              "Somehow [%s] was found but [%s] is missing at [%s]",
                              ConvertingOutputFormat.DATA_SUCCESS_KEY,
                              ConvertingOutputFormat.DATA_FILE_KEY,
                              jobDir);
                        }
                      } catch (final IOException e) {
                        throw Throwables.propagate(e);
                      }
                      try (final InputStream stream = fs.open(input)) {
                        return HadoopDruidConverterConfig.jsonMapper.readValue(
                            stream, DataSegment.class);
                      } catch (final IOException e) {
                        throw Throwables.propagate(e);
                      }
                    }
                  }));
      if (returnList.size() == segments.size()) {
        return returnList;
      } else {
        throw new ISE(
            "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
            segments.size(), returnList.size(), jobDir);
      }
    } catch (InterruptedException | ClassNotFoundException e) {
      RuntimeException exception = Throwables.propagate(e);
      throwable = exception;
      throw exception;
    } catch (Throwable t) {
      throwable = t;
      throw t;
    } finally {
      try {
        cleanup(job);
      } catch (IOException e) {
        if (throwable != null) {
          throwable.addSuppressed(e);
        } else {
          log.error(e, "Could not clean up job [%s]", job.getJobID());
        }
      }
    }
  }