/** * @param args * @throws InterruptedException */ public static void main(String[] args) throws InterruptedException { final LinkCheckerOptions options = parseOptions(args); LOG.info("Starting the linkchecker with the given options {}", options); // read files from root-directory Collection<File> files = FileUtils.listFiles(new File(options.getRootFolderPath()), new String[] {"json"}, true); int filesSize = files.size(); if (CollectionUtils.isEmpty(files)) { LOG.error( "The specified rootFolderPath is empty or does not contains any json files. Exiting...."); return; } LOG.info("Found {} files to process.", filesSize); // initialize service instances final RedirectionRulesService redirectionRulesService = new RedirectionRulesService(options); final FileMetadataService fileMetaService = new FileMetadataService(options); final LinkCheckingService linkCheckingService = new LinkCheckingService(redirectionRulesService); final Semaphore semaphore = new Semaphore(filesSize); ExecutorService threadPool = Executors.newFixedThreadPool(NUMBER_OF_THREADS); DefaultDeferredManager deferredManager = new DefaultDeferredManager(threadPool); for (File f : files) { try { final FileMetadata fileMeta = fileMetaService.readFile(f.getPath()); if (fileMeta == null) { throw new LinkCheckingException( "Could not read file " + f.getPath() + " into FileMetadata"); } processFile( fileMeta, semaphore, deferredManager, linkCheckingService, fileMetaService, options); MetricsRegistryHolder.getCounter("FILES[ALL]").inc(); } catch (Exception ex) { MetricsRegistryHolder.getCounter("FILES[MALFORMED]").inc(); LOG.error("Error reading file {}. Skipping...", f.getPath(), ex); } } // block until all finish semaphore.acquire(filesSize); // shutdown the deferredManager and the threadpool. deferredManager.shutdown(); MetricsRegistryHolder.report(); LOG.info("All files processed successfully."); }
public static void processFile( final FileMetadata fileMeta, final Semaphore semaphore, final DefaultDeferredManager deferredManager, final LinkCheckingService linkCheckingService, final FileMetadataService fileMetaService, final LinkCheckerOptions options) throws InterruptedException { // 1. call the checkFileLocations to update the url status in // fileMeta semaphore.acquire(); deferredManager .when( new Callable<FileMetadata>() { public FileMetadata call() { LOG.debug("Start link checking for file: {}", fileMeta.getFilePath()); return linkCheckingService.checkFileLocations(fileMeta); } // 2. when link checking completes try to move/copy the file // to the correct location }) .then( new DoneFilter<FileMetadata, FileMetadata>() { public FileMetadata filterDone(FileMetadata fileMeta) { try { LOG.trace("Successfully checked links for file: {}", fileMeta.getFilePath()); MetricsRegistryHolder.getCounter( fileMeta.isFailed() ? "FILES[ERROR]" : "FILES[SUCCESS]") .inc(); fileMetaService.updateOrCopyFile(fileMeta); } catch (IOException ex) { throw new LinkCheckingException( "Error handling file " + fileMeta.getFilePath() + " on result.", ex); } catch (ParserException ex) { throw new LinkCheckingException( "Error handling file " + fileMeta.getFilePath() + " on result.", ex); } return fileMeta; } // just in case link checking failed, log error and continue }, new FailFilter() { public Throwable filterFail(Throwable ex) { LOG.debug("Failed to check links for file: {}", fileMeta.getFilePath()); return ex; } // 3. when moving/copying completes too just log happiness }) .done( new DoneCallback<FileMetadata>() { public void onDone(FileMetadata fileMeta) { LOG.debug("Successfully processed file: {}", fileMeta.getFilePath()); } // just in case moving / copying failed, log error and // continue }) .fail( new FailCallback() { public void onFail(Throwable ex) { LOG.error("Failed to move/copy result file: {}", fileMeta.getFilePath(), ex); } // and release a permit on the semaphore please :) this is // crusial! }) .always( new AlwaysCallback<FileMetadata>() { public void onAlways(Promise.State state, FileMetadata d, Throwable r) { semaphore.release(); } }); }