/*
   * When testing no-pdf-check basic XML parsing, you will get partial MD records
   * depending on whether the info comes from dataset.xml or from main.xml
   */
  private void validateDatasetMetadataRecord(ArticleMetadata am) {
    log.debug3("valideDatasetMetadatRecord");
    String doi_val = am.get(MetadataField.FIELD_DOI);
    assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN));

    log.debug3("doi val is: " + doi_val);
    // The dataset doesn't set this value, it'll fail over the main.xml value
    if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) {
      assertEquals(null, am.get(MetadataField.FIELD_DATE));
    } else {
      assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE));
    }
    assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE));
  }
  /*
   * You will have to tell it the DOI and the schema because those normally come from dataset
   */
  private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) {
    log.debug3("valideSingleMainMetadatRecord");
    if ("simple-article".equals(schema)) {
      assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    } else {
      assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    }

    log.debug3("doi val is: " + doi_val);
    assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME));
    assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE));
    assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead));
    assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi));
    assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright));
  }
  /*
   * When testing a complete extraction out of the tarset, the MD record will be completely filled in
   * and pdf-existence will get established
   */
  private void validateCompleteMetadataRecord(ArticleMetadata am) {
    log.debug3("valideCompleteMetadatRecord");
    String doi_val = am.get(MetadataField.FIELD_DOI);
    /* make sure we can pick up both types of xml article data */
    log.debug3("doi val is: " + doi_val);

    if ("JA 5.2.0 SIMPLE-ARTICLE"
        .equals(am.getRaw(ElsevierDatasetXmlSchemaHelper.dataset_dtd_metadata))) {
      log.debug3("simple-article");
      assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    } else {
      assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    }
    assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN));
    assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE));
    assertEquals(accessUrlMap.get(doi_val), am.get(MetadataField.FIELD_ACCESS_URL));
    assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME));
    assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE));
    assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals("Elsevier", am.get(MetadataField.FIELD_PROVIDER));
    assertEquals("Elsevier", am.get(MetadataField.FIELD_PUBLISHER));
    log.debug3(am.ppString(2));
  }
Пример #4
0
 protected void initMimeMap() throws PluginException.InvalidDefinition {
   for (Iterator iter = definitionMap.entrySet().iterator(); iter.hasNext(); ) {
     Map.Entry ent = (Map.Entry) iter.next();
     String key = (String) ent.getKey();
     Object val = ent.getValue();
     if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " link extractor: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         LinkExtractorFactory fact =
             (LinkExtractorFactory) newAuxClass(factName, LinkExtractorFactory.class);
         mti.setLinkExtractorFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY)) {
       // XXX This clause must precede the one for SUFFIX_HASH_FILTER_FACTORY
       // XXX unless/until that key is changed to not be a terminal substring
       // XXX of this one
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " crawl filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setCrawlFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setHashFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT);
       if (val instanceof String) {
         String rate = (String) val;
         log.debug(mime + " fetch rate: " + rate);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         RateLimiter limit = mti.getFetchRateLimiter();
         if (limit != null) {
           limit.setRate(rate);
         } else {
           mti.setFetchRateLimiter(new RateLimiter(rate));
         }
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY);
       String factName = (String) val;
       log.debug(mime + " link rewriter: " + factName);
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       LinkRewriterFactory fact =
           (LinkRewriterFactory) newAuxClass(factName, LinkRewriterFactory.class);
       mti.setLinkRewriterFactory(fact);
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP);
       Map factNameMap = (Map) val;
       Map factClassMap = new HashMap();
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       for (Iterator it = factNameMap.keySet().iterator(); it.hasNext(); ) {
         String mdTypes = (String) it.next();
         String factName = (String) factNameMap.get(mdTypes);
         log.debug(mime + " (" + mdTypes + ") metadata extractor: " + factName);
         for (String mdType : (List<String>) StringUtil.breakAt(mdTypes, ";")) {
           setMdTypeFact(factClassMap, mdType, factName);
         }
       }
       mti.setFileMetadataExtractorFactoryMap(factClassMap);
     }
   }
 }
Пример #5
0
 public String getFeatureVersion(Plugin.Feature feat) {
   if (featureVersion == null) {
     return null;
   }
   return featureVersion.get(feat);
 }