public InvCatalog getDirCatalog( File directory, String filterPattern, boolean sortInIncreasingOrder, String addIdBase, boolean addDatasetSize, String dsNameMatchPattern, String startTimeSubstitutionPattern, String duration) { CrawlableDataset catalogCrDs; try { catalogCrDs = CrawlableDatasetFactory.createCrawlableDataset(directory.getAbsolutePath(), null, null); } catch (IOException e) { // @todo Should throw an IOException! throw new IllegalArgumentException("IOException while creating dataset: " + e.getMessage()); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Did not find class: " + e.getMessage()); } catch (NoSuchMethodException e) { throw new IllegalArgumentException( "Required constructor not found in class: " + e.getMessage()); } catch (IllegalAccessException e) { throw new IllegalArgumentException( "Did not have necessary access to class: " + e.getMessage()); } catch (InvocationTargetException e) { throw new IllegalArgumentException( "Could not invoke required method in class: " + e.getMessage()); } catch (InstantiationException e) { throw new IllegalArgumentException("Could not instatiate class: " + e.getMessage()); } if (!catalogCrDs.isCollection()) throw new IllegalArgumentException( "catalog directory is not a directory <" + serviceBaseUrlDir.getAbsolutePath() + ">."); return getDirCatalog( catalogCrDs, filterPattern, sortInIncreasingOrder, addIdBase, addDatasetSize, dsNameMatchPattern, startTimeSubstitutionPattern, duration); }
public CrawlableDataset getDescendant(String relativePath) { if (!this.isCollection) throw new IllegalStateException("Dataset not a collection."); String[] pathSegments = CrawlableDatasetUtils.getPathSegments(relativePath); if (!CrawlableDatasetUtils.isValidRelativePath(pathSegments)) throw new IllegalArgumentException(String.format("Ill-formed relative path [%s]", path)); boolean singleLevelPath = pathSegments.length == 1; CrawlableDataset curCrDs = this.childrenMap.get(pathSegments[0]); if (curCrDs != null) { if (singleLevelPath) return curCrDs; return curCrDs.getDescendant(CrawlableDatasetUtils.stepDownRelativePath(pathSegments)); } else { curCrDs = new MockCrawlableDataset(this.getPath() + "/" + pathSegments[0], singleLevelPath); ((MockCrawlableDataset) curCrDs).setExists(false); if (singleLevelPath) return curCrDs; return curCrDs.getDescendant(CrawlableDatasetUtils.stepDownRelativePath(pathSegments)); } }
/** * Construct a DirectoryScanner given information about the data server to be cataloged. * * @param service - the service for the datasets found in the directory scanned. * @param serviceTitle - a title for the service (used as the title of the top-level dataset). * @param serviceBaseUrlDir - the local directory to which the service's base URL references. * @param prefixPath - path name to append to the urlPath of resulting datasets, can be null. * @param createCatalogRefs - if true, generate a catalogRef for each directory, otherwise, * recurse into directories. * @throws IllegalArgumentException if invalid service type is given or root directory is not a * directory. */ public DirectoryScanner( InvService service, String serviceTitle, File serviceBaseUrlDir, String prefixPath, boolean createCatalogRefs) { this.service = service; this.serviceTitle = serviceTitle; this.serviceBaseUrlDir = serviceBaseUrlDir; try { collectionCrDs = CrawlableDatasetFactory.createCrawlableDataset( serviceBaseUrlDir.getAbsolutePath(), null, null); } catch (IOException e) { // @todo Should throw an IOException! throw new IllegalArgumentException("IOException while creating dataset: " + e.getMessage()); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Did not find class: " + e.getMessage()); } catch (NoSuchMethodException e) { throw new IllegalArgumentException( "Required constructor not found in class: " + e.getMessage()); } catch (IllegalAccessException e) { throw new IllegalArgumentException( "Did not have necessary access to class: " + e.getMessage()); } catch (InvocationTargetException e) { throw new IllegalArgumentException( "Could not invoke required method in class: " + e.getMessage()); } catch (InstantiationException e) { throw new IllegalArgumentException("Could not instatiate class: " + e.getMessage()); } if (!collectionCrDs.isCollection()) throw new IllegalArgumentException( "Base URL directory is not a directory <" + serviceBaseUrlDir.getAbsolutePath() + ">."); this.prefixPath = prefixPath; this.createCatalogRefs = createCatalogRefs; // log.debug( "DirectoryScanner(): service="+service.getName()+"; // serviceTitle="+serviceTitle+"; serviceBaseUrlDir="+serviceBaseUrlDir+"; // createCatalogRefs="+createCatalogRefs); // this.serviceTitle = serviceTitle; // this.serviceBaseUrlDir = serviceBaseUrlDir; // if ( this.serviceBaseUrlDir.getPath().indexOf( "*") == -1) // { // if ( ! this.serviceBaseUrlDir.isDirectory()) throw new IllegalArgumentException( "Base // URL directory is not a directory <" + serviceBaseUrlDir.getAbsolutePath() + ">."); // } // this.service = new ResultService( service, serviceBaseUrlDir.getAbsolutePath()); // // this.prefixPath = prefixPath; // this.createCatalogRefs = createCatalogRefs; }
/** Test ... */ public void testOne() { createFiles(); // ******** DO TEST STUFF ********** /* <filter logicalComp="OR"> <!-- Only grib1 files that are older than 60 seconds --> <filter logicalComp="AND"> <filter> <include wildcard="*.grib1"/> </filter> <filter lastModifiedLimit="60000" /> </filter> <!-- Only nc files that are less then 60 seconds old --> <filter logicalComp="AND"> <filter> <include wildcard="*.nc"/> </filter> <filter logicalComp="NOT"> <filter lastModifiedLimit="60000"/> </filter> </filter> </filter> */ CrawlableDatasetFilter includeGribFilter = new MultiSelectorFilter( new MultiSelectorFilter.Selector( new WildcardMatchOnNameFilter("*.grib1"), true, true, false)); CrawlableDatasetFilter lastModAtLeast4MinPastFilter = new LastModifiedLimitFilter(240000); CrawlableDatasetFilter oldGribFilter = LogicalFilterComposer.getAndFilter(includeGribFilter, lastModAtLeast4MinPastFilter); CrawlableDatasetFilter includeNcFilter = new MultiSelectorFilter( new MultiSelectorFilter.Selector( new WildcardMatchOnNameFilter("*.nc"), true, true, false)); CrawlableDatasetFilter newNcFilter = LogicalFilterComposer.getAndFilter( includeNcFilter, LogicalFilterComposer.getNotFilter(lastModAtLeast4MinPastFilter)); CrawlableDatasetFilter oldGribOrNewNcFilter = LogicalFilterComposer.getOrFilter(oldGribFilter, newNcFilter); CrawlableDataset tmpDirCrDs = new CrawlableDatasetFile(tmpDir); List crDsList = null; try { crDsList = tmpDirCrDs.listDatasets(); } catch (IOException e) { assertTrue("I/O problem getting contained dataset list.", false); deleteFiles(); return; } for (Iterator it = crDsList.iterator(); it.hasNext(); ) { CrawlableDataset curCrDs = (CrawlableDataset) it.next(); if (oldGribOrNewNcFilter.accept(curCrDs)) { if (!curCrDs.getName().equals("old.grib1") && !curCrDs.getName().equals("new.nc")) { assertTrue("Matched wrong file <" + curCrDs.getPath() + ">.", false); deleteFiles(); return; } } } // ******** DO TEST STUFF - END ********** deleteFiles(); }
public InvCatalog getDirCatalog( CrawlableDataset catalogCrDs, String filterPattern, boolean sortInIncreasingOrder, String addIdBase, boolean addDatasetSize, String dsNameMatchPattern, String startTimeSubstitutionPattern, String duration) { // Setup the filter CrawlableDatasetFilter filter = null; if (filterPattern != null) { // Include atomic datasets that match the given filter string. MultiSelectorFilter.Selector selector = new MultiSelectorFilter.Selector( new RegExpMatchOnNameFilter(filterPattern), true, true, false); filter = new MultiSelectorFilter(selector); } else { filter = new RegExpMatchOnNameFilter(".*"); } List enhancerList = null; if (dsNameMatchPattern != null && startTimeSubstitutionPattern != null && duration != null) { enhancerList = new ArrayList(); enhancerList.add( RegExpAndDurationTimeCoverageEnhancer.getInstanceToMatchOnDatasetName( dsNameMatchPattern, startTimeSubstitutionPattern, duration)); } CatalogBuilder catBuilder = new StandardCatalogBuilder( prefixPath, null, collectionCrDs, filter, service, addIdBase, null, null, addDatasetSize, new LexigraphicByNameSorter(sortInIncreasingOrder), null, enhancerList, null, new BooleanCatalogRefExpander(!this.createCatalogRefs)); InvCatalog catalog; try { catalog = catBuilder.generateCatalog(catalogCrDs); } catch (IOException e) { throw new IllegalArgumentException("Could not generate catalog: " + e.getMessage()); } // log.debug( "getDirCatalog(): directory=" + directory + "; filterPattern=" + filterPattern // + "; sortInIncreasingOrder=" + sortInIncreasingOrder + "; addIdBase="+addIdBase+"; // dsNameMatchPattern=" + dsNameMatchPattern + "; startTimeSubstitutionPattern=" + // startTimeSubstitutionPattern + "; duration=" + duration ); // if ( !directory.isDirectory() ) // { // String tmpMsg = "Given directory is not a directory <" + directory.getAbsolutePath() + // ">."; // log.warn( tmpMsg); // throw new IllegalArgumentException( tmpMsg ); // } // DatasetSource dsSource = DatasetSource.newDatasetSource( directory.getName(), // DatasetSourceType.LOCAL, // DatasetSourceStructure.DIRECTORY_TREE, // directory.getAbsolutePath(), // this.service ); // dsSource.setPrefixUrlPath( this.prefixPath ); // dsSource.setCreateCatalogRefs( this.createCatalogRefs ); // dsSource.setAddDatasetSize( addDatasetSize ); // if ( filterPattern != null ) // { // DatasetFilter datasetFilter = new DatasetFilter( dsSource, "Filter files on \"" + // filterPattern + "\"", // DatasetFilter.Type.REGULAR_EXPRESSION, // filterPattern ); // datasetFilter.setMatchPatternTarget( "name"); // dsSource.addDatasetFilter( datasetFilter ); // datasetFilter = new DatasetFilter( dsSource, "Allow all dirs", // DatasetFilter.Type.REGULAR_EXPRESSION, // "", true, false, false ); // datasetFilter.setMatchPatternTarget( "name" ); // dsSource.addDatasetFilter( datasetFilter ); // } // dsSource.setDatasetSorter( new DatasetSorter( sortInIncreasingOrder)); // if ( dsNameMatchPattern != null // && startTimeSubstitutionPattern != null // && duration != null) // { // dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddTimeCoverageEnhancer( // dsNameMatchPattern, startTimeSubstitutionPattern, duration)); // } // // if ( addIdBase != null) // { // dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddIdEnhancer( addIdBase)); // } // // InvCatalog cat = null; // try // { // cat = dsSource.fullExpand(); // } // catch ( IOException e ) // { // throw new IllegalArgumentException( "Given directory is not a collection dataset <" + // directory.getAbsolutePath() + ">: " + e.getMessage() ); // } InvDataset topDs = catalog.getDataset(); if (collectionCrDs.getPath().equals(catalogCrDs.getPath()) && this.serviceTitle != null) // if ( topDs.getName().equals( "" ) && this.serviceTitle != null ) { logger.warn( "getDirCatalog(): top dataset name is null, setting to serviceTitle <" + this.serviceTitle + ">"); ((InvDatasetImpl) topDs).setName(this.serviceTitle); } return (catalog); }