private boolean nameDatasetRegExp(InvDatasetImpl dataset) { boolean isMatch; Matcher matcher; // Test for a match on the urlPath if (dataset.getUrlPath() != null) { logger.debug("nameDatasetRegExp(): try naming on urlPath <{}>", dataset.getUrlPath()); matcher = this.regExpPattern.matcher(dataset.getUrlPath()); isMatch = matcher.find(); } else { matcher = this.regExpPattern.matcher(dataset.getName()); isMatch = matcher.find(); } if (isMatch) { // Test for substitution. StringBuffer resultingName = new StringBuffer(); matcher.appendReplacement(resultingName, this.substitutePattern); resultingName.delete(0, matcher.start()); if (resultingName.length() != 0) { logger.debug("nameDatasetRegExp(): Setting name to \"" + resultingName + "\"."); dataset.setName(resultingName.toString()); return true; } else { logger.debug("nameDatasetRegExp(): No name for regEx substitution."); return false; } } if (logger.isDebugEnabled()) logger.debug( "nameDatasetRegExp(): Neither URL <" + dataset.getUrlPath() + "> or name <" + dataset.getName() + "> matched pattern <" + this.matchPattern + "> ."); return false; }
public InvCatalog getDirCatalog( CrawlableDataset catalogCrDs, String filterPattern, boolean sortInIncreasingOrder, String addIdBase, boolean addDatasetSize, String dsNameMatchPattern, String startTimeSubstitutionPattern, String duration) { // Setup the filter CrawlableDatasetFilter filter = null; if (filterPattern != null) { // Include atomic datasets that match the given filter string. MultiSelectorFilter.Selector selector = new MultiSelectorFilter.Selector( new RegExpMatchOnNameFilter(filterPattern), true, true, false); filter = new MultiSelectorFilter(selector); } else { filter = new RegExpMatchOnNameFilter(".*"); } List enhancerList = null; if (dsNameMatchPattern != null && startTimeSubstitutionPattern != null && duration != null) { enhancerList = new ArrayList(); enhancerList.add( RegExpAndDurationTimeCoverageEnhancer.getInstanceToMatchOnDatasetName( dsNameMatchPattern, startTimeSubstitutionPattern, duration)); } CatalogBuilder catBuilder = new StandardCatalogBuilder( prefixPath, null, collectionCrDs, filter, service, addIdBase, null, null, addDatasetSize, new LexigraphicByNameSorter(sortInIncreasingOrder), null, enhancerList, null, new BooleanCatalogRefExpander(!this.createCatalogRefs)); InvCatalog catalog; try { catalog = catBuilder.generateCatalog(catalogCrDs); } catch (IOException e) { throw new IllegalArgumentException("Could not generate catalog: " + e.getMessage()); } // log.debug( "getDirCatalog(): directory=" + directory + "; filterPattern=" + filterPattern // + "; sortInIncreasingOrder=" + sortInIncreasingOrder + "; addIdBase="+addIdBase+"; // dsNameMatchPattern=" + dsNameMatchPattern + "; startTimeSubstitutionPattern=" + // startTimeSubstitutionPattern + "; duration=" + duration ); // if ( !directory.isDirectory() ) // { // String tmpMsg = "Given directory is not a directory <" + directory.getAbsolutePath() + // ">."; // log.warn( tmpMsg); // throw new IllegalArgumentException( tmpMsg ); // } // DatasetSource dsSource = DatasetSource.newDatasetSource( directory.getName(), // DatasetSourceType.LOCAL, // DatasetSourceStructure.DIRECTORY_TREE, // directory.getAbsolutePath(), // this.service ); // dsSource.setPrefixUrlPath( this.prefixPath ); // dsSource.setCreateCatalogRefs( this.createCatalogRefs ); // dsSource.setAddDatasetSize( addDatasetSize ); // if ( filterPattern != null ) // { // DatasetFilter datasetFilter = new DatasetFilter( dsSource, "Filter files on \"" + // filterPattern + "\"", // DatasetFilter.Type.REGULAR_EXPRESSION, // filterPattern ); // datasetFilter.setMatchPatternTarget( "name"); // dsSource.addDatasetFilter( datasetFilter ); // datasetFilter = new DatasetFilter( dsSource, "Allow all dirs", // DatasetFilter.Type.REGULAR_EXPRESSION, // "", true, false, false ); // datasetFilter.setMatchPatternTarget( "name" ); // dsSource.addDatasetFilter( datasetFilter ); // } // dsSource.setDatasetSorter( new DatasetSorter( sortInIncreasingOrder)); // if ( dsNameMatchPattern != null // && startTimeSubstitutionPattern != null // && duration != null) // { // dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddTimeCoverageEnhancer( // dsNameMatchPattern, startTimeSubstitutionPattern, duration)); // } // // if ( addIdBase != null) // { // dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddIdEnhancer( addIdBase)); // } // // InvCatalog cat = null; // try // { // cat = dsSource.fullExpand(); // } // catch ( IOException e ) // { // throw new IllegalArgumentException( "Given directory is not a collection dataset <" + // directory.getAbsolutePath() + ">: " + e.getMessage() ); // } InvDataset topDs = catalog.getDataset(); if (collectionCrDs.getPath().equals(catalogCrDs.getPath()) && this.serviceTitle != null) // if ( topDs.getName().equals( "" ) && this.serviceTitle != null ) { logger.warn( "getDirCatalog(): top dataset name is null, setting to serviceTitle <" + this.serviceTitle + ">"); ((InvDatasetImpl) topDs).setName(this.serviceTitle); } return (catalog); }