コード例 #1
1
  public static void main(String[] args) throws IOException {
    Path baseDir = null;
    String localPath = null;
    String preservePath = null;
    String sIgnoreTablesFilename = null;
    String sNoPreserveFilename = null;
    String sDateString = null;
    long size = 0;

    // UNIX dates for right now
    long now = new java.util.Date().getTime() / 1000;
    long maxDate = now;

    for (int i = 0; i < args.length; i++) {
      if (args[i].equals("--hdfs-path")) {
        baseDir = new Path(args[++i]);
        continue;
      }
      if (args[i].equals("--local-path")) {
        localPath = args[++i];
        continue;
      }
      if (args[i].equals("--preserve-path")) {
        preservePath = args[++i];
        continue;
      }
      if (args[i].equals("--no-preserve")) {
        sNoPreserveFilename = args[++i];
        continue;
      }
      if (args[i].equals("--ignore-tables")) {
        sIgnoreTablesFilename = args[++i];
        continue;
      }
      if (args[i].equals("--sleep")) {
        try {
          m_nSleepSeconds = Integer.parseInt(args[++i]);
        } catch (Exception e) {
          System.err.println("ERROR: " + e.toString() + "\n");
          usage();
        }
        continue;
      }
      if (args[i].equals("--dry-run")) {
        m_bDryRun = true;
        continue;
      }
      if (args[i].equals("--date")) {
        sDateString = args[++i];
        continue;
      }
      if (args[i].equals("--max-date")) {
        maxDate = Long.parseLong(args[++i]);
        continue;
      }
      if (args[i].equals("--max-bytes")) {
        size = Long.parseLong(args[++i]);
        continue;
      }

      System.err.println("ERROR: unknown arg " + args[i]);
      usage();
    }

    if (baseDir == null || localPath == null || preservePath == null || sDateString == null) {
      usage();
    }

    long minDate;

    if ("yesterday".equals(sDateString)) {
      // figure out yesterday's dates
      Calendar cal = Calendar.getInstance();
      cal.roll(Calendar.DAY_OF_YEAR, -1);

      // yesterday midnight
      cal.set(Calendar.HOUR_OF_DAY, 0);
      cal.set(Calendar.MINUTE, 0);
      cal.set(Calendar.SECOND, 0);
      cal.set(Calendar.MILLISECOND, 0);

      minDate = cal.getTimeInMillis() / 1000;

      // yesterday end of day
      cal.set(Calendar.HOUR_OF_DAY, 23);
      cal.set(Calendar.MINUTE, 59);
      cal.set(Calendar.SECOND, 59);
      cal.set(Calendar.MILLISECOND, 999);

      maxDate = cal.getTimeInMillis() / 1000;
    } else if ("last-week".equals(sDateString)) {
      minDate = maxDate - (7 * 24 * 60 * 60);
    } else if ("last-day".equals(sDateString)) {
      minDate = maxDate - (24 * 60 * 60);
    } else {
      // UNIX date since epoch of last backup
      minDate = Long.parseLong(sDateString);
    }

    long tmpDate = 0;
    BackupHdfs bak = new BackupHdfs();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
      bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    // initialize list of files to not preserve
    if (sNoPreserveFilename != null) {
      bak.initializeNoPreserve(sNoPreserveFilename);
    }

    ArrayList<Path> pathList = new ArrayList<Path>(2000);
    HashMap<Path, Long> hmTimestamps = new HashMap<Path, Long>();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // If the HDFS path is a dir continue
    if (fs.getFileStatus(baseDir).isDir()) {
      Calendar cal = Calendar.getInstance();

      System.err.println("");
      cal.setTimeInMillis(minDate * 1000);
      System.err.println("min date = " + cal.getTime().toString());

      cal.setTimeInMillis(maxDate * 1000);
      System.err.println("max date = " + cal.getTime().toString());

      System.err.println("");
      System.err.println("Searching filesystem: " + baseDir.toUri().getPath());

      bak.checkDir(fs, minDate, maxDate, baseDir, pathList, hmTimestamps);

      System.err.println("");
      System.err.println("Skipped " + m_nIgnoredTables + " files due to ignored tables");

      System.err.println("");
      System.err.println("Number of files to backup = " + pathList.size());

      System.err.println("Total bytes to backup = " + prettyPrintBytes(m_nTotalBytes));

      System.err.println("");
      System.err.println("sorting list of files...");
      Collections.sort(pathList, new DateComparator(hmTimestamps));
      System.err.println("done");

      System.err.println("");
      System.err.println("starting backup...");
      tmpDate = bak.backupFiles(localPath, preservePath, fs, pathList, size);

      bak.closeFiles();

      System.err.println("");
      System.err.println("backup completed...");
    }

    if (tmpDate == 0) {
      // If not size limit reached print out date for right now
      System.out.println(maxDate);
    } else {
      // Print out date for last file backed up
      System.err.println("Size limit reached.");
      System.out.println(tmpDate);
    }

    System.exit(0);
  }
コード例 #2
0
  /**
   * Method to go though the HDFS filesystem in a DFS to find all files
   *
   * <p>fs:FileSystem object from HDFS minDate: Oldest date for files to be backed up maxDate:Newest
   * date for files to be backed up p:Path in HDFS to look for files pathList:Will be filled with
   * all files in p hmTimestamps: hashmap of timestamps for later sorting
   */
  public void checkDir(
      FileSystem fs,
      long minDate,
      long maxDate,
      Path p,
      ArrayList<Path> pathList,
      HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
      String sPath = p.toUri().getPath();

      // If this is a directory
      if (fs.getFileStatus(p).isDir()) {
        // ignore certain directories
        if ("dfstmp".equals(p.getName())
            || "tmp".equals(p.getName())
            || "jobtracker".equals(p.getName())
            || sPath.startsWith("/mapred")
            || "ops".equals(p.getName())
            || p.getName().startsWith("_distcp_logs")) {
          return;
        }

        // dump the mkdir and chmod commands for this
        // directory -- skip root directory only
        {
          FileStatus stat = fs.getFileStatus(p);

          if (!sPath.equals("/")) {
            m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
          }

          m_wrChmods.println(
              "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

          Short sh = new Short(stat.getPermission().toShort());
          m_wrChmods.println(
              "hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
        }

        fStat = fs.listStatus(p);

        // Do a recursive call to all elements
        for (int i = 0; i < fStat.length; i++) {
          checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
        }
      } else {
        // If not a directory then we've found a file

        // ignore crc files
        if (p.getName().endsWith(".crc")) {
          return;
        }

        // ignore other files
        if (sPath.startsWith("/user/oozie/etl/workflows/")) {
          return;
        }

        // try to get the table name from the path. There are
        // various types of tables, from those replicated from
        // another database to regular hive tables to
        // partitioned hive tables.  We use table names to
        // both exclude some from the backup, and for the rest
        // to dump out the schema and partition name.
        if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
          m_nIgnoredTables++;

          if (m_nIgnoredTables < 5) {
            System.out.println("Skipping ignore-table file: " + sPath);
          } else if (m_nIgnoredTables == 5) {
            System.out.println("(...not showing other skipped tables...)");
          }
          return;
        }

        FileStatus stat = fs.getFileStatus(p);

        tmpDate = stat.getModificationTime() / 1000;

        // store the chmods/chowns for all files
        m_wrChmods.println(
            "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

        m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

        // check dates.  is it too young?
        if (tmpDate < minDate) {
          return;
        }

        // is the file too recent?
        if (tmpDate > maxDate) {
          // System.out.println("file too recent: " + sPath);
          return;
        }

        // file timestamp is ok
        pathList.add(p);

        hmTimestamps.put(p, new Long(tmpDate));

        // store info about total bytes neeed to backup
        m_nTotalBytes += fs.getContentSummary(p).getLength();
      }
    } catch (IOException e) {
      System.err.println("ERROR: could not open " + p + ": " + e);

      // System.exit(1) ;
    }
  }
コード例 #3
0
  /**
   * Method to move files from HDFS to local filesystem
   *
   * <p>localPath: Path on the machines filesystem fs:FileSystem object from HDFS pathList:List of
   * paths for files that might need to be backed up size:max size in bytes to be backed up
   *
   * <p>ReturnsDate of the last files backed up if reached size limit, else, zero
   */
  public long backupFiles(
      String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList, long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
      try {
        long nFileSize = fs.getContentSummary(hdfsPath).getLength();
        tmpSize = tmpSize + nFileSize;

        if ((tmpSize <= size) || (size == 0)) {
          FileStatus stat = fs.getFileStatus(hdfsPath);

          System.err.println(
              "File "
                  + hdfsPath.toUri().getPath()
                  + " "
                  + nFileSize
                  + " bytes, "
                  + "perms: "
                  + stat.getOwner()
                  + "/"
                  + stat.getGroup()
                  + ", "
                  + stat.getPermission().toString());

          tmpDate = stat.getModificationTime() / 1000;

          String sFsPath = localPath + hdfsPath.toUri().getPath();
          fsPath = new Path(sFsPath);

          File f = new File(sFsPath);

          // COMMENTED OUT: until a few backup cycles run
          // and the mtime gets in fact set on all copied
          // files.
          //
          // ignore it if the file exists and has the same mtime
          // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
          // {
          // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
          // continue;
          // }

          if (false == m_bDryRun) {
            // check if we need to back up the local file
            // (not directory), if it already exists.
            if (f.exists() && f.isFile()) {
              // ignore files with substrings in the
              // no-preserve file
              if (true == doPreserveFile(sFsPath)) {
                // move it to the backup path
                String sNewPath = preservePath + hdfsPath.toUri().getPath();
                File newFile = new File(sNewPath);

                // create directory structure for new file?
                if (false == newFile.getParentFile().exists()) {
                  if (false == newFile.getParentFile().mkdirs()) {
                    System.err.println("Failed to mkdirs " + newFile.getParentFile().toString());
                    System.exit(1);
                  }
                }

                // rename existing file to new location
                if (false == f.renameTo(newFile)) {
                  System.err.println(
                      "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                  System.exit(1);
                }

                System.out.println("preserved " + f.toString() + " into " + newFile.toString());
              } else {
                System.out.println("skipped preservation of " + f.toString());
              }
            }

            // copy from hdfs to local filesystem
            fs.copyToLocalFile(hdfsPath, fsPath);

            // set the mtime to match hdfs file
            f.setLastModified(stat.getModificationTime());

            // compare checksums on both files
            compareChecksums(fs, hdfsPath, sFsPath);
          }

          // don't print the progress after every file -- go
          // by at least 1% increments
          long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
          if (nPercentDone > m_nLastPercentBytesDone) {
            System.out.println(
                "progress: copied "
                    + prettyPrintBytes(tmpSize)
                    + ", "
                    + nPercentDone
                    + "% done"
                    + ", tstamp="
                    + tmpDate);

            m_nLastPercentBytesDone = nPercentDone;
          }

          if (m_nSleepSeconds > 0) {
            try {
              Thread.sleep(1000 * m_nSleepSeconds);
            } catch (Exception e2) {
              // ignore
            }
          }
        } else {
          return tmpDate;
        }
      } catch (IOException e) {
        System.err.println("FATAL ERROR: Something wrong with the file");
        System.err.println(e);
        System.out.println(tmpDate);
        System.exit(1);

        return 0;
      }
    }

    return 0;
  }