예제 #1
0
  public InvCatalog getDirCatalog(
      File directory,
      String filterPattern,
      boolean sortInIncreasingOrder,
      String addIdBase,
      boolean addDatasetSize,
      String dsNameMatchPattern,
      String startTimeSubstitutionPattern,
      String duration) {
    CrawlableDataset catalogCrDs;
    try {
      catalogCrDs =
          CrawlableDatasetFactory.createCrawlableDataset(directory.getAbsolutePath(), null, null);
    } catch (IOException e) {
      // @todo Should throw an IOException!
      throw new IllegalArgumentException("IOException while creating dataset: " + e.getMessage());
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Did not find class: " + e.getMessage());
    } catch (NoSuchMethodException e) {
      throw new IllegalArgumentException(
          "Required constructor not found in class: " + e.getMessage());
    } catch (IllegalAccessException e) {
      throw new IllegalArgumentException(
          "Did not have necessary access to class: " + e.getMessage());
    } catch (InvocationTargetException e) {
      throw new IllegalArgumentException(
          "Could not invoke required method in class: " + e.getMessage());
    } catch (InstantiationException e) {
      throw new IllegalArgumentException("Could not instatiate class: " + e.getMessage());
    }
    if (!catalogCrDs.isCollection())
      throw new IllegalArgumentException(
          "catalog directory is not a directory <" + serviceBaseUrlDir.getAbsolutePath() + ">.");

    return getDirCatalog(
        catalogCrDs,
        filterPattern,
        sortInIncreasingOrder,
        addIdBase,
        addDatasetSize,
        dsNameMatchPattern,
        startTimeSubstitutionPattern,
        duration);
  }
  public CrawlableDataset getDescendant(String relativePath) {
    if (!this.isCollection) throw new IllegalStateException("Dataset not a collection.");
    String[] pathSegments = CrawlableDatasetUtils.getPathSegments(relativePath);
    if (!CrawlableDatasetUtils.isValidRelativePath(pathSegments))
      throw new IllegalArgumentException(String.format("Ill-formed relative path [%s]", path));

    boolean singleLevelPath = pathSegments.length == 1;

    CrawlableDataset curCrDs = this.childrenMap.get(pathSegments[0]);
    if (curCrDs != null) {
      if (singleLevelPath) return curCrDs;
      return curCrDs.getDescendant(CrawlableDatasetUtils.stepDownRelativePath(pathSegments));
    } else {
      curCrDs = new MockCrawlableDataset(this.getPath() + "/" + pathSegments[0], singleLevelPath);
      ((MockCrawlableDataset) curCrDs).setExists(false);
      if (singleLevelPath) return curCrDs;
      return curCrDs.getDescendant(CrawlableDatasetUtils.stepDownRelativePath(pathSegments));
    }
  }
예제 #3
0
  /**
   * Construct a DirectoryScanner given information about the data server to be cataloged.
   *
   * @param service - the service for the datasets found in the directory scanned.
   * @param serviceTitle - a title for the service (used as the title of the top-level dataset).
   * @param serviceBaseUrlDir - the local directory to which the service's base URL references.
   * @param prefixPath - path name to append to the urlPath of resulting datasets, can be null.
   * @param createCatalogRefs - if true, generate a catalogRef for each directory, otherwise,
   *     recurse into directories.
   * @throws IllegalArgumentException if invalid service type is given or root directory is not a
   *     directory.
   */
  public DirectoryScanner(
      InvService service,
      String serviceTitle,
      File serviceBaseUrlDir,
      String prefixPath,
      boolean createCatalogRefs) {
    this.service = service;
    this.serviceTitle = serviceTitle;
    this.serviceBaseUrlDir = serviceBaseUrlDir;
    try {
      collectionCrDs =
          CrawlableDatasetFactory.createCrawlableDataset(
              serviceBaseUrlDir.getAbsolutePath(), null, null);
    } catch (IOException e) {
      // @todo Should throw an IOException!
      throw new IllegalArgumentException("IOException while creating dataset: " + e.getMessage());
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Did not find class: " + e.getMessage());
    } catch (NoSuchMethodException e) {
      throw new IllegalArgumentException(
          "Required constructor not found in class: " + e.getMessage());
    } catch (IllegalAccessException e) {
      throw new IllegalArgumentException(
          "Did not have necessary access to class: " + e.getMessage());
    } catch (InvocationTargetException e) {
      throw new IllegalArgumentException(
          "Could not invoke required method in class: " + e.getMessage());
    } catch (InstantiationException e) {
      throw new IllegalArgumentException("Could not instatiate class: " + e.getMessage());
    }
    if (!collectionCrDs.isCollection())
      throw new IllegalArgumentException(
          "Base URL directory is not a directory <" + serviceBaseUrlDir.getAbsolutePath() + ">.");

    this.prefixPath = prefixPath;
    this.createCatalogRefs = createCatalogRefs;

    //    log.debug( "DirectoryScanner(): service="+service.getName()+";
    // serviceTitle="+serviceTitle+"; serviceBaseUrlDir="+serviceBaseUrlDir+";
    // createCatalogRefs="+createCatalogRefs);
    //    this.serviceTitle = serviceTitle;
    //    this.serviceBaseUrlDir = serviceBaseUrlDir;
    //    if ( this.serviceBaseUrlDir.getPath().indexOf( "*") == -1)
    //    {
    //      if ( ! this.serviceBaseUrlDir.isDirectory()) throw new IllegalArgumentException( "Base
    // URL directory is not a directory <" + serviceBaseUrlDir.getAbsolutePath() + ">.");
    //    }
    //    this.service = new ResultService( service, serviceBaseUrlDir.getAbsolutePath());
    //
    //    this.prefixPath = prefixPath;
    //    this.createCatalogRefs = createCatalogRefs;
  }
예제 #4
0
  /** Test ... */
  public void testOne() {
    createFiles();

    // ******** DO TEST STUFF **********
    /*
     <filter logicalComp="OR">
       <!-- Only grib1 files that are older than 60 seconds -->
       <filter logicalComp="AND">
         <filter>
           <include wildcard="*.grib1"/>
         </filter>
         <filter lastModifiedLimit="60000"  />
       </filter>
       <!-- Only nc files that are less then 60 seconds old -->
       <filter logicalComp="AND">
         <filter>
           <include wildcard="*.nc"/>
         </filter>
         <filter logicalComp="NOT">
           <filter lastModifiedLimit="60000"/>
         </filter>
       </filter>
     </filter>
    */
    CrawlableDatasetFilter includeGribFilter =
        new MultiSelectorFilter(
            new MultiSelectorFilter.Selector(
                new WildcardMatchOnNameFilter("*.grib1"), true, true, false));
    CrawlableDatasetFilter lastModAtLeast4MinPastFilter = new LastModifiedLimitFilter(240000);
    CrawlableDatasetFilter oldGribFilter =
        LogicalFilterComposer.getAndFilter(includeGribFilter, lastModAtLeast4MinPastFilter);

    CrawlableDatasetFilter includeNcFilter =
        new MultiSelectorFilter(
            new MultiSelectorFilter.Selector(
                new WildcardMatchOnNameFilter("*.nc"), true, true, false));
    CrawlableDatasetFilter newNcFilter =
        LogicalFilterComposer.getAndFilter(
            includeNcFilter, LogicalFilterComposer.getNotFilter(lastModAtLeast4MinPastFilter));

    CrawlableDatasetFilter oldGribOrNewNcFilter =
        LogicalFilterComposer.getOrFilter(oldGribFilter, newNcFilter);

    CrawlableDataset tmpDirCrDs = new CrawlableDatasetFile(tmpDir);
    List crDsList = null;
    try {
      crDsList = tmpDirCrDs.listDatasets();
    } catch (IOException e) {
      assertTrue("I/O problem getting contained dataset list.", false);
      deleteFiles();
      return;
    }
    for (Iterator it = crDsList.iterator(); it.hasNext(); ) {
      CrawlableDataset curCrDs = (CrawlableDataset) it.next();
      if (oldGribOrNewNcFilter.accept(curCrDs)) {
        if (!curCrDs.getName().equals("old.grib1") && !curCrDs.getName().equals("new.nc")) {
          assertTrue("Matched wrong file <" + curCrDs.getPath() + ">.", false);
          deleteFiles();
          return;
        }
      }
    }

    // ******** DO TEST STUFF - END **********
    deleteFiles();
  }
예제 #5
0
  public InvCatalog getDirCatalog(
      CrawlableDataset catalogCrDs,
      String filterPattern,
      boolean sortInIncreasingOrder,
      String addIdBase,
      boolean addDatasetSize,
      String dsNameMatchPattern,
      String startTimeSubstitutionPattern,
      String duration) {

    // Setup the filter
    CrawlableDatasetFilter filter = null;
    if (filterPattern != null) {
      // Include atomic datasets that match the given filter string.
      MultiSelectorFilter.Selector selector =
          new MultiSelectorFilter.Selector(
              new RegExpMatchOnNameFilter(filterPattern), true, true, false);
      filter = new MultiSelectorFilter(selector);
    } else {
      filter = new RegExpMatchOnNameFilter(".*");
    }
    List enhancerList = null;
    if (dsNameMatchPattern != null && startTimeSubstitutionPattern != null && duration != null) {
      enhancerList = new ArrayList();
      enhancerList.add(
          RegExpAndDurationTimeCoverageEnhancer.getInstanceToMatchOnDatasetName(
              dsNameMatchPattern, startTimeSubstitutionPattern, duration));
    }
    CatalogBuilder catBuilder =
        new StandardCatalogBuilder(
            prefixPath,
            null,
            collectionCrDs,
            filter,
            service,
            addIdBase,
            null,
            null,
            addDatasetSize,
            new LexigraphicByNameSorter(sortInIncreasingOrder),
            null,
            enhancerList,
            null,
            new BooleanCatalogRefExpander(!this.createCatalogRefs));

    InvCatalog catalog;
    try {
      catalog = catBuilder.generateCatalog(catalogCrDs);
    } catch (IOException e) {
      throw new IllegalArgumentException("Could not generate catalog: " + e.getMessage());
    }

    //    log.debug( "getDirCatalog(): directory=" + directory + "; filterPattern=" + filterPattern
    // + "; sortInIncreasingOrder=" + sortInIncreasingOrder + "; addIdBase="+addIdBase+";
    // dsNameMatchPattern=" + dsNameMatchPattern + "; startTimeSubstitutionPattern=" +
    // startTimeSubstitutionPattern + "; duration=" + duration );
    //    if ( !directory.isDirectory() )
    //    {
    //      String tmpMsg = "Given directory is not a directory <" + directory.getAbsolutePath() +
    // ">.";
    //      log.warn( tmpMsg);
    //      throw new IllegalArgumentException( tmpMsg );
    //    }
    //    DatasetSource dsSource = DatasetSource.newDatasetSource( directory.getName(),
    //                                                             DatasetSourceType.LOCAL,
    // DatasetSourceStructure.DIRECTORY_TREE,
    //                                                             directory.getAbsolutePath(),
    // this.service );
    //    dsSource.setPrefixUrlPath( this.prefixPath );
    //    dsSource.setCreateCatalogRefs( this.createCatalogRefs );
    //    dsSource.setAddDatasetSize( addDatasetSize );
    //    if ( filterPattern != null )
    //    {
    //      DatasetFilter datasetFilter = new DatasetFilter( dsSource, "Filter files on \"" +
    // filterPattern + "\"",
    //                                                       DatasetFilter.Type.REGULAR_EXPRESSION,
    // filterPattern );
    //      datasetFilter.setMatchPatternTarget( "name");
    //      dsSource.addDatasetFilter( datasetFilter );
    //      datasetFilter = new DatasetFilter( dsSource, "Allow all dirs",
    //                                         DatasetFilter.Type.REGULAR_EXPRESSION,
    //                                         "", true, false, false );
    //      datasetFilter.setMatchPatternTarget( "name" );
    //      dsSource.addDatasetFilter( datasetFilter );
    //    }
    //    dsSource.setDatasetSorter( new DatasetSorter( sortInIncreasingOrder));
    //    if ( dsNameMatchPattern != null
    //         && startTimeSubstitutionPattern != null
    //         && duration != null)
    //    {
    //      dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddTimeCoverageEnhancer(
    // dsNameMatchPattern, startTimeSubstitutionPattern, duration));
    //    }
    //
    //    if ( addIdBase != null)
    //    {
    //      dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddIdEnhancer( addIdBase));
    //    }
    //
    //    InvCatalog cat = null;
    //    try
    //    {
    //      cat = dsSource.fullExpand();
    //    }
    //    catch ( IOException e )
    //    {
    //      throw new IllegalArgumentException( "Given directory is not a collection dataset <" +
    // directory.getAbsolutePath() + ">: " + e.getMessage() );
    //    }
    InvDataset topDs = catalog.getDataset();
    if (collectionCrDs.getPath().equals(catalogCrDs.getPath()) && this.serviceTitle != null)
    // if ( topDs.getName().equals( "" ) && this.serviceTitle != null )
    {
      logger.warn(
          "getDirCatalog(): top dataset name is null, setting to serviceTitle <"
              + this.serviceTitle
              + ">");
      ((InvDatasetImpl) topDs).setName(this.serviceTitle);
    }

    return (catalog);
  }