예제 #1
0
  private boolean nameDatasetRegExp(InvDatasetImpl dataset) {
    boolean isMatch;
    Matcher matcher;

    // Test for a match on the urlPath
    if (dataset.getUrlPath() != null) {
      logger.debug("nameDatasetRegExp(): try naming on urlPath <{}>", dataset.getUrlPath());
      matcher = this.regExpPattern.matcher(dataset.getUrlPath());
      isMatch = matcher.find();
    } else {
      matcher = this.regExpPattern.matcher(dataset.getName());
      isMatch = matcher.find();
    }

    if (isMatch) {
      // Test for substitution.
      StringBuffer resultingName = new StringBuffer();
      matcher.appendReplacement(resultingName, this.substitutePattern);
      resultingName.delete(0, matcher.start());

      if (resultingName.length() != 0) {
        logger.debug("nameDatasetRegExp(): Setting name to \"" + resultingName + "\".");
        dataset.setName(resultingName.toString());
        return true;
      } else {
        logger.debug("nameDatasetRegExp(): No name for regEx substitution.");
        return false;
      }
    }
    if (logger.isDebugEnabled())
      logger.debug(
          "nameDatasetRegExp(): Neither URL <"
              + dataset.getUrlPath()
              + "> or name <"
              + dataset.getName()
              + "> matched pattern <"
              + this.matchPattern
              + "> .");
    return false;
  }
예제 #2
0
  public InvCatalog getDirCatalog(
      CrawlableDataset catalogCrDs,
      String filterPattern,
      boolean sortInIncreasingOrder,
      String addIdBase,
      boolean addDatasetSize,
      String dsNameMatchPattern,
      String startTimeSubstitutionPattern,
      String duration) {

    // Setup the filter
    CrawlableDatasetFilter filter = null;
    if (filterPattern != null) {
      // Include atomic datasets that match the given filter string.
      MultiSelectorFilter.Selector selector =
          new MultiSelectorFilter.Selector(
              new RegExpMatchOnNameFilter(filterPattern), true, true, false);
      filter = new MultiSelectorFilter(selector);
    } else {
      filter = new RegExpMatchOnNameFilter(".*");
    }
    List enhancerList = null;
    if (dsNameMatchPattern != null && startTimeSubstitutionPattern != null && duration != null) {
      enhancerList = new ArrayList();
      enhancerList.add(
          RegExpAndDurationTimeCoverageEnhancer.getInstanceToMatchOnDatasetName(
              dsNameMatchPattern, startTimeSubstitutionPattern, duration));
    }
    CatalogBuilder catBuilder =
        new StandardCatalogBuilder(
            prefixPath,
            null,
            collectionCrDs,
            filter,
            service,
            addIdBase,
            null,
            null,
            addDatasetSize,
            new LexigraphicByNameSorter(sortInIncreasingOrder),
            null,
            enhancerList,
            null,
            new BooleanCatalogRefExpander(!this.createCatalogRefs));

    InvCatalog catalog;
    try {
      catalog = catBuilder.generateCatalog(catalogCrDs);
    } catch (IOException e) {
      throw new IllegalArgumentException("Could not generate catalog: " + e.getMessage());
    }

    //    log.debug( "getDirCatalog(): directory=" + directory + "; filterPattern=" + filterPattern
    // + "; sortInIncreasingOrder=" + sortInIncreasingOrder + "; addIdBase="+addIdBase+";
    // dsNameMatchPattern=" + dsNameMatchPattern + "; startTimeSubstitutionPattern=" +
    // startTimeSubstitutionPattern + "; duration=" + duration );
    //    if ( !directory.isDirectory() )
    //    {
    //      String tmpMsg = "Given directory is not a directory <" + directory.getAbsolutePath() +
    // ">.";
    //      log.warn( tmpMsg);
    //      throw new IllegalArgumentException( tmpMsg );
    //    }
    //    DatasetSource dsSource = DatasetSource.newDatasetSource( directory.getName(),
    //                                                             DatasetSourceType.LOCAL,
    // DatasetSourceStructure.DIRECTORY_TREE,
    //                                                             directory.getAbsolutePath(),
    // this.service );
    //    dsSource.setPrefixUrlPath( this.prefixPath );
    //    dsSource.setCreateCatalogRefs( this.createCatalogRefs );
    //    dsSource.setAddDatasetSize( addDatasetSize );
    //    if ( filterPattern != null )
    //    {
    //      DatasetFilter datasetFilter = new DatasetFilter( dsSource, "Filter files on \"" +
    // filterPattern + "\"",
    //                                                       DatasetFilter.Type.REGULAR_EXPRESSION,
    // filterPattern );
    //      datasetFilter.setMatchPatternTarget( "name");
    //      dsSource.addDatasetFilter( datasetFilter );
    //      datasetFilter = new DatasetFilter( dsSource, "Allow all dirs",
    //                                         DatasetFilter.Type.REGULAR_EXPRESSION,
    //                                         "", true, false, false );
    //      datasetFilter.setMatchPatternTarget( "name" );
    //      dsSource.addDatasetFilter( datasetFilter );
    //    }
    //    dsSource.setDatasetSorter( new DatasetSorter( sortInIncreasingOrder));
    //    if ( dsNameMatchPattern != null
    //         && startTimeSubstitutionPattern != null
    //         && duration != null)
    //    {
    //      dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddTimeCoverageEnhancer(
    // dsNameMatchPattern, startTimeSubstitutionPattern, duration));
    //    }
    //
    //    if ( addIdBase != null)
    //    {
    //      dsSource.addDatasetEnhancer( DatasetEnhancer1.createAddIdEnhancer( addIdBase));
    //    }
    //
    //    InvCatalog cat = null;
    //    try
    //    {
    //      cat = dsSource.fullExpand();
    //    }
    //    catch ( IOException e )
    //    {
    //      throw new IllegalArgumentException( "Given directory is not a collection dataset <" +
    // directory.getAbsolutePath() + ">: " + e.getMessage() );
    //    }
    InvDataset topDs = catalog.getDataset();
    if (collectionCrDs.getPath().equals(catalogCrDs.getPath()) && this.serviceTitle != null)
    // if ( topDs.getName().equals( "" ) && this.serviceTitle != null )
    {
      logger.warn(
          "getDirCatalog(): top dataset name is null, setting to serviceTitle <"
              + this.serviceTitle
              + ">");
      ((InvDatasetImpl) topDs).setName(this.serviceTitle);
    }

    return (catalog);
  }