/** * Looks for the dirs to clean. The folder structure is YYYY/MM/DD/Serial so we can use that to * more efficiently find the directories to clean by comparing the cutoff timestamp with the * timestamp from the folder structure. * * @param fc done dir FileContext * @param root folder for completed jobs * @param cutoff The cutoff for the max history age * @return The list of directories for cleaning * @throws IOException */ public static List<FileStatus> getHistoryDirsForCleaning(FileContext fc, Path root, long cutoff) throws IOException { List<FileStatus> fsList = new ArrayList<FileStatus>(); Calendar cCal = Calendar.getInstance(); cCal.setTimeInMillis(cutoff); int cYear = cCal.get(Calendar.YEAR); int cMonth = cCal.get(Calendar.MONTH) + 1; int cDate = cCal.get(Calendar.DATE); RemoteIterator<FileStatus> yearDirIt = fc.listStatus(root); while (yearDirIt.hasNext()) { FileStatus yearDir = yearDirIt.next(); try { int year = Integer.parseInt(yearDir.getPath().getName()); if (year <= cYear) { RemoteIterator<FileStatus> monthDirIt = fc.listStatus(yearDir.getPath()); while (monthDirIt.hasNext()) { FileStatus monthDir = monthDirIt.next(); try { int month = Integer.parseInt(monthDir.getPath().getName()); // If we only checked the month here, then something like 07/2013 // would incorrectly not pass when the cutoff is 06/2014 if (year < cYear || month <= cMonth) { RemoteIterator<FileStatus> dateDirIt = fc.listStatus(monthDir.getPath()); while (dateDirIt.hasNext()) { FileStatus dateDir = dateDirIt.next(); try { int date = Integer.parseInt(dateDir.getPath().getName()); // If we only checked the date here, then something like // 07/21/2013 would incorrectly not pass when the cutoff is // 08/20/2013 or 07/20/2012 if (year < cYear || month < cMonth || date <= cDate) { fsList.addAll(remoteIterToList(fc.listStatus(dateDir.getPath()))); } } catch (NumberFormatException nfe) { // the directory didn't fit the format we're looking for so // skip the dir } } } } catch (NumberFormatException nfe) { // the directory didn't fit the format we're looking for so skip // the dir } } } } catch (NumberFormatException nfe) { // the directory didn't fit the format we're looking for so skip the dir } } return fsList; }
// hasMismatches is just used to return a second value if you want // one. I would have used MutableBoxedBoolean if such had been provided. public static List<FileStatus> localGlobber( FileContext fc, Path root, String tail, PathFilter filter, AtomicBoolean hasFlatFiles) throws IOException { if (tail.equals("")) { return (listFilteredStatus(fc, root, filter)); } if (tail.startsWith("/*")) { Path[] subdirs = filteredStat2Paths(remoteIterToList(fc.listStatus(root)), true, hasFlatFiles); List<List<FileStatus>> subsubdirs = new LinkedList<List<FileStatus>>(); int subsubdirCount = 0; if (subdirs.length == 0) { return new LinkedList<FileStatus>(); } String newTail = tail.substring(2); for (int i = 0; i < subdirs.length; ++i) { subsubdirs.add(localGlobber(fc, subdirs[i], newTail, filter, null)); // subsubdirs.set(i, localGlobber(fc, subdirs[i], newTail, filter, // null)); subsubdirCount += subsubdirs.get(i).size(); } List<FileStatus> result = new LinkedList<FileStatus>(); for (int i = 0; i < subsubdirs.size(); ++i) { result.addAll(subsubdirs.get(i)); } return result; } if (tail.startsWith("/")) { int split = tail.indexOf('/', 1); if (split < 0) { return listFilteredStatus(fc, new Path(root, tail.substring(1)), filter); } else { String thisSegment = tail.substring(1, split); String newTail = tail.substring(split); return localGlobber(fc, new Path(root, thisSegment), newTail, filter, hasFlatFiles); } } IOException e = new IOException("localGlobber: bad tail"); throw e; }
private static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } return jhStatusList; }
private static List<FileStatus> listFilteredStatus(FileContext fc, Path root, PathFilter filter) throws IOException { List<FileStatus> fsList = remoteIterToList(fc.listStatus(root)); if (filter == null) { return fsList; } else { List<FileStatus> filteredList = new LinkedList<FileStatus>(); for (FileStatus fs : fsList) { if (filter.accept(fs.getPath())) { filteredList.add(fs); } } return filteredList; } }