Пример #1
0
 protected void initFeatureVersions() throws PluginException.InvalidDefinition {
   if (definitionMap.containsKey(KEY_PLUGIN_FEATURE_VERSION_MAP)) {
     Map<Plugin.Feature, String> map = new HashMap<Plugin.Feature, String>();
     Map<String, String> spec =
         (Map<String, String>) definitionMap.getMap(KEY_PLUGIN_FEATURE_VERSION_MAP);
     log.debug2("features: " + spec);
     for (Map.Entry<String, String> ent : spec.entrySet()) {
       try {
         // Prefix version string with feature name to create separate
         // namespace for each feature
         String key = ent.getKey();
         map.put(Plugin.Feature.valueOf(key), key + "_" + ent.getValue());
       } catch (RuntimeException e) {
         log.warning(
             getPluginName()
                 + " set unknown feature: "
                 + ent.getKey()
                 + " to version "
                 + ent.getValue(),
             e);
         throw new PluginException.InvalidDefinition("Unknown feature: " + ent.getKey(), e);
       }
     }
     featureVersion = map;
   } else {
     featureVersion = null;
   }
 }
Пример #2
0
 protected void initAuFeatureMap() {
   if (definitionMap.containsKey(DefinableArchivalUnit.KEY_AU_FEATURE_URL_MAP)) {
     Map<String, ?> featMap = definitionMap.getMap(DefinableArchivalUnit.KEY_AU_FEATURE_URL_MAP);
     for (Map.Entry ent : featMap.entrySet()) {
       Object val = ent.getValue();
       if (val instanceof Map) {
         ent.setValue(MapUtil.expandAlternativeKeyLists((Map) val));
       }
     }
   }
 }
  /*
   * When testing no-pdf-check basic XML parsing, you will get partial MD records
   * depending on whether the info comes from dataset.xml or from main.xml
   */
  private void validateDatasetMetadataRecord(ArticleMetadata am) {
    log.debug3("valideDatasetMetadatRecord");
    String doi_val = am.get(MetadataField.FIELD_DOI);
    assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN));

    log.debug3("doi val is: " + doi_val);
    // The dataset doesn't set this value, it'll fail over the main.xml value
    if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) {
      assertEquals(null, am.get(MetadataField.FIELD_DATE));
    } else {
      assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE));
    }
    assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE));
  }
  /*
   * You will have to tell it the DOI and the schema because those normally come from dataset
   */
  private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) {
    log.debug3("valideSingleMainMetadatRecord");
    if ("simple-article".equals(schema)) {
      assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    } else {
      assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    }

    log.debug3("doi val is: " + doi_val);
    assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME));
    assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE));
    assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead));
    assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi));
    assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright));
  }
Пример #5
0
 /** If in testing mode FOO, copy values from FOO_override map, if any, to main map */
 void processOverrides(TypedEntryMap map) {
   String testMode = getTestingMode();
   if (StringUtil.isNullString(testMode)) {
     return;
   }
   Object o = map.getMapElement(testMode + DefinableArchivalUnit.SUFFIX_OVERRIDE);
   if (o == null) {
     return;
   }
   if (o instanceof Map) {
     Map overrideMap = (Map) o;
     for (Map.Entry entry : (Set<Map.Entry>) overrideMap.entrySet()) {
       String key = (String) entry.getKey();
       Object val = entry.getValue();
       log.debug(getDefaultPluginName() + ": Overriding " + key + " with " + val);
       map.setMapElement(key, val);
     }
   }
 }
  /*
   * When testing a complete extraction out of the tarset, the MD record will be completely filled in
   * and pdf-existence will get established
   */
  private void validateCompleteMetadataRecord(ArticleMetadata am) {
    log.debug3("valideCompleteMetadatRecord");
    String doi_val = am.get(MetadataField.FIELD_DOI);
    /* make sure we can pick up both types of xml article data */
    log.debug3("doi val is: " + doi_val);

    if ("JA 5.2.0 SIMPLE-ARTICLE"
        .equals(am.getRaw(ElsevierDatasetXmlSchemaHelper.dataset_dtd_metadata))) {
      log.debug3("simple-article");
      assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    } else {
      assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE));
    }
    assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN));
    assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE));
    assertEquals(accessUrlMap.get(doi_val), am.get(MetadataField.FIELD_ACCESS_URL));
    assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME));
    assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE));
    assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE));
    assertEquals("Elsevier", am.get(MetadataField.FIELD_PROVIDER));
    assertEquals("Elsevier", am.get(MetadataField.FIELD_PUBLISHER));
    log.debug3(am.ppString(2));
  }
Пример #7
0
 private void setMdTypeFact(Map factClassMap, String mdType, String factName) {
   log.debug3("Metadata type: " + mdType + " factory " + factName);
   FileMetadataExtractorFactory fact =
       (FileMetadataExtractorFactory) newAuxClass(factName, FileMetadataExtractorFactory.class);
   factClassMap.put(mdType, fact);
 }
Пример #8
0
 protected void initMimeMap() throws PluginException.InvalidDefinition {
   for (Iterator iter = definitionMap.entrySet().iterator(); iter.hasNext(); ) {
     Map.Entry ent = (Map.Entry) iter.next();
     String key = (String) ent.getKey();
     Object val = ent.getValue();
     if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " link extractor: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         LinkExtractorFactory fact =
             (LinkExtractorFactory) newAuxClass(factName, LinkExtractorFactory.class);
         mti.setLinkExtractorFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY)) {
       // XXX This clause must precede the one for SUFFIX_HASH_FILTER_FACTORY
       // XXX unless/until that key is changed to not be a terminal substring
       // XXX of this one
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " crawl filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setCrawlFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY);
       if (val instanceof String) {
         String factName = (String) val;
         log.debug(mime + " filter: " + factName);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class);
         mti.setHashFilterFactory(fact);
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT);
       if (val instanceof String) {
         String rate = (String) val;
         log.debug(mime + " fetch rate: " + rate);
         MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
         RateLimiter limit = mti.getFetchRateLimiter();
         if (limit != null) {
           limit.setRate(rate);
         } else {
           mti.setFetchRateLimiter(new RateLimiter(rate));
         }
       }
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY);
       String factName = (String) val;
       log.debug(mime + " link rewriter: " + factName);
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       LinkRewriterFactory fact =
           (LinkRewriterFactory) newAuxClass(factName, LinkRewriterFactory.class);
       mti.setLinkRewriterFactory(fact);
     } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP)) {
       String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP);
       Map factNameMap = (Map) val;
       Map factClassMap = new HashMap();
       MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime);
       for (Iterator it = factNameMap.keySet().iterator(); it.hasNext(); ) {
         String mdTypes = (String) it.next();
         String factName = (String) factNameMap.get(mdTypes);
         log.debug(mime + " (" + mdTypes + ") metadata extractor: " + factName);
         for (String mdType : (List<String>) StringUtil.breakAt(mdTypes, ";")) {
           setMdTypeFact(factClassMap, mdType, factName);
         }
       }
       mti.setFileMetadataExtractorFactoryMap(factClassMap);
     }
   }
 }
Пример #9
0
 public String getFeatureVersion(Plugin.Feature feat) {
   if (featureVersion == null) {
     return null;
   }
   return featureVersion.get(feat);
 }
  /*
   *  The supporting methods
   */
  private void setUpExpectedTarContent() {
    /* maps the DOIs in the metadata to the expected values */
    log.debug3("setUpExpectedTarContent");
    pubTitleMap = new HashMap<String, String>();
    {
      pubTitleMap.put("10.1016/j.jidx.2014.07.028", "International Journal of XXX");
      pubTitleMap.put("10.1016/j.jidx2.2014.05.013", "Revista");
      pubTitleMap.put("10.1016/S1473-1111(14)70840-0", "The Journal");
      pubTitleMap.put("10.1016/S0140-1111(14)61865-1", "The Other Journal");
      pubTitleMap.put("10.1016/j.foo.2014.08.001", "Foo");
      pubTitleMap.put("10.1016/j.foo.2014.08.123", "Foo");
    }
    ;

    dateMap = new HashMap<String, String>();
    {
      dateMap.put("10.1016/j.jidx.2014.07.028", "2014-07-30");
      dateMap.put("10.1016/j.jidx2.2014.05.013", "2014-07-09");
      dateMap.put("10.1016/S1473-1111(14)70840-0", "2014-09-01");
      dateMap.put("10.1016/S0140-1111(14)61865-1", "2014"); // will get from main.xml as backup
      dateMap.put("10.1016/j.foo.2014.08.001", "2014-08-20");
      dateMap.put("10.1016/j.foo.2014.08.123", "2014-08-20");
    }
    ;

    accessUrlMap = new HashMap<String, String>();
    {
      accessUrlMap.put(
          "10.1016/j.jidx.2014.07.028",
          TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.pdf");
      accessUrlMap.put(
          "10.1016/j.jidx2.2014.05.013",
          TAR_A_BASE + SUBDIR + "00349356/v61i9/S0034935614001819/main.pdf");
      accessUrlMap.put(
          "10.1016/S1473-1111(14)70840-0",
          TAR_A_BASE + SUBDIR + "14733099/v14i10/S1473309914708400/main.pdf");
      accessUrlMap.put(
          "10.1016/S0140-1111(14)61865-1",
          TAR_B_BASE + SUBDIR + "01406736/v384sS1/S0140673614618651/main.pdf");
      accessUrlMap.put(
          "10.1016/j.foo.2014.08.001",
          TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514004151/main.pdf");
      accessUrlMap.put(
          "10.1016/j.foo.2014.08.123",
          TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514003856/main.pdf");
    }
    ;

    ArrayList<String> goodAuthors = new ArrayList<String>();
    {
      goodAuthors.add("Writer, Bob");
      goodAuthors.add("Q. Text, Samantha");
    }
    ArrayList<String> simpleAuthors = new ArrayList<String>();
    {
      simpleAuthors.add("Simple, Josh");
    }
    ArrayList<String> extendedAuthors = new ArrayList<String>();
    {
      extendedAuthors.add("Writer, Bob");
      extendedAuthors.add("Q. Text, Samantha");
      extendedAuthors.add("The COLLABORATIVE Investigators");
    }

    authorMap = new HashMap<String, List<String>>();
    {
      authorMap.put("10.1016/j.jidx.2014.07.028", goodAuthors);
      authorMap.put("10.1016/j.jidx2.2014.05.013", goodAuthors);
      authorMap.put("10.1016/S1473-1111(14)70840-0", extendedAuthors);
      authorMap.put("10.1016/S0140-1111(14)61865-1", simpleAuthors);
      authorMap.put("10.1016/j.foo.2014.08.001", goodAuthors);
      authorMap.put("10.1016/j.foo.2014.08.123", goodAuthors);
    }
    ;

    volMap = new HashMap<String, String>();
    {
      volMap.put("10.1016/j.jidx.2014.07.028", "64");
      volMap.put("10.1016/j.jidx2.2014.05.013", "61");
      volMap.put("10.1016/S1473-1111(14)70840-0", "14");
      volMap.put("10.1016/S0140-1111(14)61865-1", "384");
      volMap.put("10.1016/j.foo.2014.08.001", "242");
      volMap.put("10.1016/j.foo.2014.08.123", "242");
    }
    ;

    issueMap = new HashMap<String, String>();
    {
      issueMap.put("10.1016/j.jidx.2014.07.028", "C");
      issueMap.put("10.1016/j.jidx2.2014.05.013", "9");
      issueMap.put("10.1016/S1473-1111(14)70840-0", "10");
      issueMap.put("10.1016/S0140-1111(14)61865-1", "S1");
      issueMap.put("10.1016/j.foo.2014.08.001", "C");
      issueMap.put("10.1016/j.foo.2014.08.123", "C");
    }
    ;
  }