public static void main(String[] args) throws IOException {
    Path baseDir = null;
    String localPath = null;
    String preservePath = null;
    String sIgnoreTablesFilename = null;
    String sNoPreserveFilename = null;
    String sDateString = null;
    long size = 0;

    // UNIX dates for right now
    long now = new java.util.Date().getTime() / 1000;
    long maxDate = now;

    for (int i = 0; i < args.length; i++) {
      if (args[i].equals("--hdfs-path")) {
        baseDir = new Path(args[++i]);
        continue;
      }
      if (args[i].equals("--local-path")) {
        localPath = args[++i];
        continue;
      }
      if (args[i].equals("--preserve-path")) {
        preservePath = args[++i];
        continue;
      }
      if (args[i].equals("--no-preserve")) {
        sNoPreserveFilename = args[++i];
        continue;
      }
      if (args[i].equals("--ignore-tables")) {
        sIgnoreTablesFilename = args[++i];
        continue;
      }
      if (args[i].equals("--sleep")) {
        try {
          m_nSleepSeconds = Integer.parseInt(args[++i]);
        } catch (Exception e) {
          System.err.println("ERROR: " + e.toString() + "\n");
          usage();
        }
        continue;
      }
      if (args[i].equals("--dry-run")) {
        m_bDryRun = true;
        continue;
      }
      if (args[i].equals("--date")) {
        sDateString = args[++i];
        continue;
      }
      if (args[i].equals("--max-date")) {
        maxDate = Long.parseLong(args[++i]);
        continue;
      }
      if (args[i].equals("--max-bytes")) {
        size = Long.parseLong(args[++i]);
        continue;
      }

      System.err.println("ERROR: unknown arg " + args[i]);
      usage();
    }

    if (baseDir == null || localPath == null || preservePath == null || sDateString == null) {
      usage();
    }

    long minDate;

    if ("yesterday".equals(sDateString)) {
      // figure out yesterday's dates
      Calendar cal = Calendar.getInstance();
      cal.roll(Calendar.DAY_OF_YEAR, -1);

      // yesterday midnight
      cal.set(Calendar.HOUR_OF_DAY, 0);
      cal.set(Calendar.MINUTE, 0);
      cal.set(Calendar.SECOND, 0);
      cal.set(Calendar.MILLISECOND, 0);

      minDate = cal.getTimeInMillis() / 1000;

      // yesterday end of day
      cal.set(Calendar.HOUR_OF_DAY, 23);
      cal.set(Calendar.MINUTE, 59);
      cal.set(Calendar.SECOND, 59);
      cal.set(Calendar.MILLISECOND, 999);

      maxDate = cal.getTimeInMillis() / 1000;
    } else if ("last-week".equals(sDateString)) {
      minDate = maxDate - (7 * 24 * 60 * 60);
    } else if ("last-day".equals(sDateString)) {
      minDate = maxDate - (24 * 60 * 60);
    } else {
      // UNIX date since epoch of last backup
      minDate = Long.parseLong(sDateString);
    }

    long tmpDate = 0;
    BackupHdfs bak = new BackupHdfs();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
      bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    // initialize list of files to not preserve
    if (sNoPreserveFilename != null) {
      bak.initializeNoPreserve(sNoPreserveFilename);
    }

    ArrayList<Path> pathList = new ArrayList<Path>(2000);
    HashMap<Path, Long> hmTimestamps = new HashMap<Path, Long>();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // If the HDFS path is a dir continue
    if (fs.getFileStatus(baseDir).isDir()) {
      Calendar cal = Calendar.getInstance();

      System.err.println("");
      cal.setTimeInMillis(minDate * 1000);
      System.err.println("min date = " + cal.getTime().toString());

      cal.setTimeInMillis(maxDate * 1000);
      System.err.println("max date = " + cal.getTime().toString());

      System.err.println("");
      System.err.println("Searching filesystem: " + baseDir.toUri().getPath());

      bak.checkDir(fs, minDate, maxDate, baseDir, pathList, hmTimestamps);

      System.err.println("");
      System.err.println("Skipped " + m_nIgnoredTables + " files due to ignored tables");

      System.err.println("");
      System.err.println("Number of files to backup = " + pathList.size());

      System.err.println("Total bytes to backup = " + prettyPrintBytes(m_nTotalBytes));

      System.err.println("");
      System.err.println("sorting list of files...");
      Collections.sort(pathList, new DateComparator(hmTimestamps));
      System.err.println("done");

      System.err.println("");
      System.err.println("starting backup...");
      tmpDate = bak.backupFiles(localPath, preservePath, fs, pathList, size);

      bak.closeFiles();

      System.err.println("");
      System.err.println("backup completed...");
    }

    if (tmpDate == 0) {
      // If not size limit reached print out date for right now
      System.out.println(maxDate);
    } else {
      // Print out date for last file backed up
      System.err.println("Size limit reached.");
      System.out.println(tmpDate);
    }

    System.exit(0);
  }
Exemplo n.º 2
0
  public void doTestTextBatchAppend() throws Exception {
    LOG.debug("Starting...");

    final long rollCount = 10;
    final long batchSize = 2;
    final String fileName = "PageView";

    String newPath = testPath + "/singleTextBucket";
    int totalEvents = 0;
    int i = 1, j = 1;

    // clear the test directory
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path(newPath);
    fs.delete(dirPath, true);
    fs.mkdirs(dirPath);

    Context context = new Context();
    context.put("hdfs.path", newPath);
    context.put("hdfs.rollCount", String.valueOf(rollCount));
    context.put("hdfs.batchSize", String.valueOf(batchSize));
    context.put("hdfs.filePrefix", "pageview");

    Channel channel = new MemoryChannel();
    Configurables.configure(channel, context);
    sink.setChannel(channel);
    sink.start();

    Calendar eventDate = Calendar.getInstance();
    Date currentDate = new Date();
    Map<String, String> header = new HashMap<String, String>();
    header.put("topic", "PageView");

    List<String> bodies = Lists.newArrayList();

    // 将测试的事件推入到通道中
    for (i = 1; i <= (rollCount * 10) / batchSize; i++) {
      Transaction txn = channel.getTransaction();
      txn.begin();
      for (j = 1; j <= batchSize; j++) {
        header.put("timestamp", String.valueOf(currentDate.getTime()));
        Event event = new SimpleEvent();
        eventDate.clear();
        eventDate.set(2014, i, i, i, 0);
        String body = "Test." + i + "." + j;

        event.setHeaders(header);
        event.setBody(body.getBytes());
        bodies.add(body);
        channel.put(event);
        totalEvents++;
      }
      txn.commit();
      txn.close();

      // execute sink to process the events
      sink.process();
    }
    sink.stop();

    FileStatus[] dirStat = fs.listStatus(dirPath);
    Path fList[] = FileUtil.stat2Paths(dirStat);

    long expectedFiles = totalEvents / rollCount;
    if (totalEvents % rollCount > 0) {
      expectedFiles++;
    }

    Assert.assertEquals(
        "num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length);
    // 检查所有写入文件的内容
    verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
  }