Esempio n. 1
0
  /**
   * @param args
   * @throws InterruptedException
   */
  public static void main(String[] args) throws InterruptedException {
    final LinkCheckerOptions options = parseOptions(args);
    LOG.info("Starting the linkchecker with the given options {}", options);

    // read files from root-directory
    Collection<File> files =
        FileUtils.listFiles(new File(options.getRootFolderPath()), new String[] {"json"}, true);
    int filesSize = files.size();

    if (CollectionUtils.isEmpty(files)) {
      LOG.error(
          "The specified rootFolderPath is empty or does not contains any json files. Exiting....");
      return;
    }
    LOG.info("Found {} files to process.", filesSize);

    // initialize service instances
    final RedirectionRulesService redirectionRulesService = new RedirectionRulesService(options);
    final FileMetadataService fileMetaService = new FileMetadataService(options);
    final LinkCheckingService linkCheckingService =
        new LinkCheckingService(redirectionRulesService);

    final Semaphore semaphore = new Semaphore(filesSize);
    ExecutorService threadPool = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
    DefaultDeferredManager deferredManager = new DefaultDeferredManager(threadPool);

    for (File f : files) {
      try {
        final FileMetadata fileMeta = fileMetaService.readFile(f.getPath());
        if (fileMeta == null) {
          throw new LinkCheckingException(
              "Could not read file " + f.getPath() + " into FileMetadata");
        }

        processFile(
            fileMeta, semaphore, deferredManager, linkCheckingService, fileMetaService, options);

        MetricsRegistryHolder.getCounter("FILES[ALL]").inc();

      } catch (Exception ex) {
        MetricsRegistryHolder.getCounter("FILES[MALFORMED]").inc();
        LOG.error("Error reading file {}. Skipping...", f.getPath(), ex);
      }
    }

    // block until all finish
    semaphore.acquire(filesSize);

    // shutdown the deferredManager and the threadpool.
    deferredManager.shutdown();

    MetricsRegistryHolder.report();
    LOG.info("All files processed successfully.");
  }
Esempio n. 2
0
  public static void processFile(
      final FileMetadata fileMeta,
      final Semaphore semaphore,
      final DefaultDeferredManager deferredManager,
      final LinkCheckingService linkCheckingService,
      final FileMetadataService fileMetaService,
      final LinkCheckerOptions options)
      throws InterruptedException {
    // 1. call the checkFileLocations to update the url status in
    // fileMeta
    semaphore.acquire();
    deferredManager
        .when(
            new Callable<FileMetadata>() {
              public FileMetadata call() {
                LOG.debug("Start link checking for file: {}", fileMeta.getFilePath());
                return linkCheckingService.checkFileLocations(fileMeta);
              }

              // 2. when link checking completes try to move/copy the file
              // to the correct location
            })
        .then(
            new DoneFilter<FileMetadata, FileMetadata>() {
              public FileMetadata filterDone(FileMetadata fileMeta) {
                try {
                  LOG.trace("Successfully checked links for file: {}", fileMeta.getFilePath());
                  MetricsRegistryHolder.getCounter(
                          fileMeta.isFailed() ? "FILES[ERROR]" : "FILES[SUCCESS]")
                      .inc();
                  fileMetaService.updateOrCopyFile(fileMeta);
                } catch (IOException ex) {
                  throw new LinkCheckingException(
                      "Error handling file " + fileMeta.getFilePath() + " on result.", ex);
                } catch (ParserException ex) {
                  throw new LinkCheckingException(
                      "Error handling file " + fileMeta.getFilePath() + " on result.", ex);
                }

                return fileMeta;
              }

              // just in case link checking failed, log error and continue
            },
            new FailFilter() {
              public Throwable filterFail(Throwable ex) {
                LOG.debug("Failed to check links for file: {}", fileMeta.getFilePath());
                return ex;
              }

              // 3. when moving/copying completes too just log happiness
            })
        .done(
            new DoneCallback<FileMetadata>() {
              public void onDone(FileMetadata fileMeta) {
                LOG.debug("Successfully processed file: {}", fileMeta.getFilePath());
              }

              // just in case moving / copying failed, log error and
              // continue
            })
        .fail(
            new FailCallback() {
              public void onFail(Throwable ex) {
                LOG.error("Failed to move/copy result file: {}", fileMeta.getFilePath(), ex);
              }

              // and release a permit on the semaphore please :) this is
              // crusial!
            })
        .always(
            new AlwaysCallback<FileMetadata>() {
              public void onAlways(Promise.State state, FileMetadata d, Throwable r) {
                semaphore.release();
              }
            });
  }