/**
   * Method to go though the HDFS filesystem in a DFS to find all files
   *
   * <p>fs:FileSystem object from HDFS minDate: Oldest date for files to be backed up maxDate:Newest
   * date for files to be backed up p:Path in HDFS to look for files pathList:Will be filled with
   * all files in p hmTimestamps: hashmap of timestamps for later sorting
   */
  public void checkDir(
      FileSystem fs,
      long minDate,
      long maxDate,
      Path p,
      ArrayList<Path> pathList,
      HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
      String sPath = p.toUri().getPath();

      // If this is a directory
      if (fs.getFileStatus(p).isDir()) {
        // ignore certain directories
        if ("dfstmp".equals(p.getName())
            || "tmp".equals(p.getName())
            || "jobtracker".equals(p.getName())
            || sPath.startsWith("/mapred")
            || "ops".equals(p.getName())
            || p.getName().startsWith("_distcp_logs")) {
          return;
        }

        // dump the mkdir and chmod commands for this
        // directory -- skip root directory only
        {
          FileStatus stat = fs.getFileStatus(p);

          if (!sPath.equals("/")) {
            m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
          }

          m_wrChmods.println(
              "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

          Short sh = new Short(stat.getPermission().toShort());
          m_wrChmods.println(
              "hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
        }

        fStat = fs.listStatus(p);

        // Do a recursive call to all elements
        for (int i = 0; i < fStat.length; i++) {
          checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
        }
      } else {
        // If not a directory then we've found a file

        // ignore crc files
        if (p.getName().endsWith(".crc")) {
          return;
        }

        // ignore other files
        if (sPath.startsWith("/user/oozie/etl/workflows/")) {
          return;
        }

        // try to get the table name from the path. There are
        // various types of tables, from those replicated from
        // another database to regular hive tables to
        // partitioned hive tables.  We use table names to
        // both exclude some from the backup, and for the rest
        // to dump out the schema and partition name.
        if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
          m_nIgnoredTables++;

          if (m_nIgnoredTables < 5) {
            System.out.println("Skipping ignore-table file: " + sPath);
          } else if (m_nIgnoredTables == 5) {
            System.out.println("(...not showing other skipped tables...)");
          }
          return;
        }

        FileStatus stat = fs.getFileStatus(p);

        tmpDate = stat.getModificationTime() / 1000;

        // store the chmods/chowns for all files
        m_wrChmods.println(
            "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

        m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

        // check dates.  is it too young?
        if (tmpDate < minDate) {
          return;
        }

        // is the file too recent?
        if (tmpDate > maxDate) {
          // System.out.println("file too recent: " + sPath);
          return;
        }

        // file timestamp is ok
        pathList.add(p);

        hmTimestamps.put(p, new Long(tmpDate));

        // store info about total bytes neeed to backup
        m_nTotalBytes += fs.getContentSummary(p).getLength();
      }
    } catch (IOException e) {
      System.err.println("ERROR: could not open " + p + ": " + e);

      // System.exit(1) ;
    }
  }