protected void initFeatureVersions() throws PluginException.InvalidDefinition { if (definitionMap.containsKey(KEY_PLUGIN_FEATURE_VERSION_MAP)) { Map<Plugin.Feature, String> map = new HashMap<Plugin.Feature, String>(); Map<String, String> spec = (Map<String, String>) definitionMap.getMap(KEY_PLUGIN_FEATURE_VERSION_MAP); log.debug2("features: " + spec); for (Map.Entry<String, String> ent : spec.entrySet()) { try { // Prefix version string with feature name to create separate // namespace for each feature String key = ent.getKey(); map.put(Plugin.Feature.valueOf(key), key + "_" + ent.getValue()); } catch (RuntimeException e) { log.warning( getPluginName() + " set unknown feature: " + ent.getKey() + " to version " + ent.getValue(), e); throw new PluginException.InvalidDefinition("Unknown feature: " + ent.getKey(), e); } } featureVersion = map; } else { featureVersion = null; } }
protected void initAuFeatureMap() { if (definitionMap.containsKey(DefinableArchivalUnit.KEY_AU_FEATURE_URL_MAP)) { Map<String, ?> featMap = definitionMap.getMap(DefinableArchivalUnit.KEY_AU_FEATURE_URL_MAP); for (Map.Entry ent : featMap.entrySet()) { Object val = ent.getValue(); if (val instanceof Map) { ent.setValue(MapUtil.expandAlternativeKeyLists((Map) val)); } } } }
/* * When testing no-pdf-check basic XML parsing, you will get partial MD records * depending on whether the info comes from dataset.xml or from main.xml */ private void validateDatasetMetadataRecord(ArticleMetadata am) { log.debug3("valideDatasetMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); log.debug3("doi val is: " + doi_val); // The dataset doesn't set this value, it'll fail over the main.xml value if (doi_val.equals("10.1016/S0140-1111(14)61865-1")) { assertEquals(null, am.get(MetadataField.FIELD_DATE)); } else { assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); } assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); }
/* * You will have to tell it the DOI and the schema because those normally come from dataset */ private void validateSingleMainMetadataRecord(ArticleMetadata am, String doi_val, String schema) { log.debug3("valideSingleMainMetadatRecord"); if ("simple-article".equals(schema)) { assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } log.debug3("doi val is: " + doi_val); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals("Comment", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_dochead)); assertEquals(doi_val, am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_doi)); assertEquals("2014", am.getRaw(ElsevierMainDTD5XmlSchemaHelper.common_copyright)); }
/** If in testing mode FOO, copy values from FOO_override map, if any, to main map */ void processOverrides(TypedEntryMap map) { String testMode = getTestingMode(); if (StringUtil.isNullString(testMode)) { return; } Object o = map.getMapElement(testMode + DefinableArchivalUnit.SUFFIX_OVERRIDE); if (o == null) { return; } if (o instanceof Map) { Map overrideMap = (Map) o; for (Map.Entry entry : (Set<Map.Entry>) overrideMap.entrySet()) { String key = (String) entry.getKey(); Object val = entry.getValue(); log.debug(getDefaultPluginName() + ": Overriding " + key + " with " + val); map.setMapElement(key, val); } } }
/* * When testing a complete extraction out of the tarset, the MD record will be completely filled in * and pdf-existence will get established */ private void validateCompleteMetadataRecord(ArticleMetadata am) { log.debug3("valideCompleteMetadatRecord"); String doi_val = am.get(MetadataField.FIELD_DOI); /* make sure we can pick up both types of xml article data */ log.debug3("doi val is: " + doi_val); if ("JA 5.2.0 SIMPLE-ARTICLE" .equals(am.getRaw(ElsevierDatasetXmlSchemaHelper.dataset_dtd_metadata))) { log.debug3("simple-article"); assertEquals(common_simple_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } else { assertEquals(common_article_title, am.get(MetadataField.FIELD_ARTICLE_TITLE)); } assertEquals(common_issn, am.get(MetadataField.FIELD_ISSN)); assertEquals(authorMap.get(doi_val), am.getList(MetadataField.FIELD_AUTHOR)); assertEquals(dateMap.get(doi_val), am.get(MetadataField.FIELD_DATE)); assertEquals(accessUrlMap.get(doi_val), am.get(MetadataField.FIELD_ACCESS_URL)); assertEquals(volMap.get(doi_val), am.get(MetadataField.FIELD_VOLUME)); assertEquals(issueMap.get(doi_val), am.get(MetadataField.FIELD_ISSUE)); assertEquals(pubTitleMap.get(doi_val), am.get(MetadataField.FIELD_PUBLICATION_TITLE)); assertEquals("Elsevier", am.get(MetadataField.FIELD_PROVIDER)); assertEquals("Elsevier", am.get(MetadataField.FIELD_PUBLISHER)); log.debug3(am.ppString(2)); }
private void setMdTypeFact(Map factClassMap, String mdType, String factName) { log.debug3("Metadata type: " + mdType + " factory " + factName); FileMetadataExtractorFactory fact = (FileMetadataExtractorFactory) newAuxClass(factName, FileMetadataExtractorFactory.class); factClassMap.put(mdType, fact); }
protected void initMimeMap() throws PluginException.InvalidDefinition { for (Iterator iter = definitionMap.entrySet().iterator(); iter.hasNext(); ) { Map.Entry ent = (Map.Entry) iter.next(); String key = (String) ent.getKey(); Object val = ent.getValue(); if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY)) { String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_EXTRACTOR_FACTORY); if (val instanceof String) { String factName = (String) val; log.debug(mime + " link extractor: " + factName); MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime); LinkExtractorFactory fact = (LinkExtractorFactory) newAuxClass(factName, LinkExtractorFactory.class); mti.setLinkExtractorFactory(fact); } } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY)) { // XXX This clause must precede the one for SUFFIX_HASH_FILTER_FACTORY // XXX unless/until that key is changed to not be a terminal substring // XXX of this one String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_CRAWL_FILTER_FACTORY); if (val instanceof String) { String factName = (String) val; log.debug(mime + " crawl filter: " + factName); MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime); FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class); mti.setCrawlFilterFactory(fact); } } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY)) { String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY); if (val instanceof String) { String factName = (String) val; log.debug(mime + " filter: " + factName); MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime); FilterFactory fact = (FilterFactory) newAuxClass(factName, FilterFactory.class); mti.setHashFilterFactory(fact); } } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT)) { String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_FETCH_RATE_LIMIT); if (val instanceof String) { String rate = (String) val; log.debug(mime + " fetch rate: " + rate); MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime); RateLimiter limit = mti.getFetchRateLimiter(); if (limit != null) { limit.setRate(rate); } else { mti.setFetchRateLimiter(new RateLimiter(rate)); } } } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY)) { String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_LINK_REWRITER_FACTORY); String factName = (String) val; log.debug(mime + " link rewriter: " + factName); MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime); LinkRewriterFactory fact = (LinkRewriterFactory) newAuxClass(factName, LinkRewriterFactory.class); mti.setLinkRewriterFactory(fact); } else if (key.endsWith(DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP)) { String mime = stripSuffix(key, DefinableArchivalUnit.SUFFIX_METADATA_EXTRACTOR_FACTORY_MAP); Map factNameMap = (Map) val; Map factClassMap = new HashMap(); MimeTypeInfo.Mutable mti = mimeMap.modifyMimeTypeInfo(mime); for (Iterator it = factNameMap.keySet().iterator(); it.hasNext(); ) { String mdTypes = (String) it.next(); String factName = (String) factNameMap.get(mdTypes); log.debug(mime + " (" + mdTypes + ") metadata extractor: " + factName); for (String mdType : (List<String>) StringUtil.breakAt(mdTypes, ";")) { setMdTypeFact(factClassMap, mdType, factName); } } mti.setFileMetadataExtractorFactoryMap(factClassMap); } } }
public String getFeatureVersion(Plugin.Feature feat) { if (featureVersion == null) { return null; } return featureVersion.get(feat); }
/* * The supporting methods */ private void setUpExpectedTarContent() { /* maps the DOIs in the metadata to the expected values */ log.debug3("setUpExpectedTarContent"); pubTitleMap = new HashMap<String, String>(); { pubTitleMap.put("10.1016/j.jidx.2014.07.028", "International Journal of XXX"); pubTitleMap.put("10.1016/j.jidx2.2014.05.013", "Revista"); pubTitleMap.put("10.1016/S1473-1111(14)70840-0", "The Journal"); pubTitleMap.put("10.1016/S0140-1111(14)61865-1", "The Other Journal"); pubTitleMap.put("10.1016/j.foo.2014.08.001", "Foo"); pubTitleMap.put("10.1016/j.foo.2014.08.123", "Foo"); } ; dateMap = new HashMap<String, String>(); { dateMap.put("10.1016/j.jidx.2014.07.028", "2014-07-30"); dateMap.put("10.1016/j.jidx2.2014.05.013", "2014-07-09"); dateMap.put("10.1016/S1473-1111(14)70840-0", "2014-09-01"); dateMap.put("10.1016/S0140-1111(14)61865-1", "2014"); // will get from main.xml as backup dateMap.put("10.1016/j.foo.2014.08.001", "2014-08-20"); dateMap.put("10.1016/j.foo.2014.08.123", "2014-08-20"); } ; accessUrlMap = new HashMap<String, String>(); { accessUrlMap.put( "10.1016/j.jidx.2014.07.028", TAR_A_BASE + SUBDIR + "01420615/v64sC/S0142061514004608/main.pdf"); accessUrlMap.put( "10.1016/j.jidx2.2014.05.013", TAR_A_BASE + SUBDIR + "00349356/v61i9/S0034935614001819/main.pdf"); accessUrlMap.put( "10.1016/S1473-1111(14)70840-0", TAR_A_BASE + SUBDIR + "14733099/v14i10/S1473309914708400/main.pdf"); accessUrlMap.put( "10.1016/S0140-1111(14)61865-1", TAR_B_BASE + SUBDIR + "01406736/v384sS1/S0140673614618651/main.pdf"); accessUrlMap.put( "10.1016/j.foo.2014.08.001", TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514004151/main.pdf"); accessUrlMap.put( "10.1016/j.foo.2014.08.123", TAR_B_BASE + SUBDIR + "00191035/v242sC/S0019103514003856/main.pdf"); } ; ArrayList<String> goodAuthors = new ArrayList<String>(); { goodAuthors.add("Writer, Bob"); goodAuthors.add("Q. Text, Samantha"); } ArrayList<String> simpleAuthors = new ArrayList<String>(); { simpleAuthors.add("Simple, Josh"); } ArrayList<String> extendedAuthors = new ArrayList<String>(); { extendedAuthors.add("Writer, Bob"); extendedAuthors.add("Q. Text, Samantha"); extendedAuthors.add("The COLLABORATIVE Investigators"); } authorMap = new HashMap<String, List<String>>(); { authorMap.put("10.1016/j.jidx.2014.07.028", goodAuthors); authorMap.put("10.1016/j.jidx2.2014.05.013", goodAuthors); authorMap.put("10.1016/S1473-1111(14)70840-0", extendedAuthors); authorMap.put("10.1016/S0140-1111(14)61865-1", simpleAuthors); authorMap.put("10.1016/j.foo.2014.08.001", goodAuthors); authorMap.put("10.1016/j.foo.2014.08.123", goodAuthors); } ; volMap = new HashMap<String, String>(); { volMap.put("10.1016/j.jidx.2014.07.028", "64"); volMap.put("10.1016/j.jidx2.2014.05.013", "61"); volMap.put("10.1016/S1473-1111(14)70840-0", "14"); volMap.put("10.1016/S0140-1111(14)61865-1", "384"); volMap.put("10.1016/j.foo.2014.08.001", "242"); volMap.put("10.1016/j.foo.2014.08.123", "242"); } ; issueMap = new HashMap<String, String>(); { issueMap.put("10.1016/j.jidx.2014.07.028", "C"); issueMap.put("10.1016/j.jidx2.2014.05.013", "9"); issueMap.put("10.1016/S1473-1111(14)70840-0", "10"); issueMap.put("10.1016/S0140-1111(14)61865-1", "S1"); issueMap.put("10.1016/j.foo.2014.08.001", "C"); issueMap.put("10.1016/j.foo.2014.08.123", "C"); } ; }