/* * Test the functionality of the MetadataUtilities * */ public void testNormalizeTitleValue() throws Exception { assertEquals( BaseAtyponMetadataUtil.normalizeTitle("The title goes here"), BaseAtyponMetadataUtil.normalizeTitle("Title Goes Here")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle("Title with random spaces"), BaseAtyponMetadataUtil.normalizeTitle("Title with random spaces")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle("Normalize -- hyphen"), BaseAtyponMetadataUtil.normalizeTitle("normalize \u2013\u2013 hyphen")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle("Title and title"), BaseAtyponMetadataUtil.normalizeTitle("Title & title")); assertEquals( BaseAtyponMetadataUtil.normalizeTitle(" leading spaces"), BaseAtyponMetadataUtil.normalizeTitle("leading spaces")); // now checking the fall-back last ditch attempt assertEquals( BaseAtyponMetadataUtil.generateRawTitle("leading spaces:colon?"), BaseAtyponMetadataUtil.generateRawTitle("leadingspacescolon")); assertEquals( BaseAtyponMetadataUtil.generateRawTitle("relapsing-remitting"), BaseAtyponMetadataUtil.generateRawTitle("relapsing?remitting")); assertEquals( BaseAtyponMetadataUtil.generateRawTitle("foo\"blah"), BaseAtyponMetadataUtil.generateRawTitle("foo-blah")); }
// override this to do some additional attempts to get valid data before emitting @Override public void extract(MetadataTarget target, CachedUrl cu, FileMetadataExtractor.Emitter emitter) throws IOException, PluginException { ArticleMetadata am = extract(target, cu); /* * if, due to overcrawl, we got to a page that didn't have anything * valid, eg "this page not found" html page * don't emit empty metadata (because defaults would get put in * Must do this after cooking, because it checks size of cooked info */ if (am.isEmpty()) { return; } /* * RIS data can be variable. We don't have any way to add priority to * the cooking of data, so fallback to alternate values manually * * There are differences between books and journals, so fork for titles * and metadata check */ if (am.get(MetadataField.FIELD_DATE) == null) { if (am.getRaw("Y1") != null) { // if DA wasn't there, use Y1 am.put(MetadataField.FIELD_DATE, am.getRaw("Y1")); } } /* * Determine if this is a book item or a journal item. * set the appropriate article type once the daemon passes along the TY */ String ris_type = am.getRaw("TY"); if (ris_type == null) { // pre 1.69, do an alternate check because TY wasn't passed through ris_type = "JOUR"; // set a default if (am.get(MetadataField.FIELD_ISBN) != null) { // it is a bad value, but it was recognized as an isbn because of TY type ris_type = "BOOK"; // it could be a chapter but until TY is passed through... } } // Modify or try alternate RIS tag values based after cooking postCookProcess(cu, am, ris_type); // Only emit if this item is likely to be from this AU // protect against counting overcrawled articles by checking against // values from the TDB file - differentiate between book items and journal itesm ArchivalUnit au = cu.getArchivalUnit(); if (ris_type.contains("BOOK") || ris_type.contains("CHAP")) { if (!BaseAtyponMetadataUtil.metadataMatchesBookTdb(au, am)) { return; } } else { // JOURNAL default is to assume it's a journal for backwards compatibility if (!BaseAtyponMetadataUtil.metadataMatchesTdb(au, am)) { return; } } /* * Fill in DOI, publisher, other information available from * the URL or TDB * CORRECT the access.url if it is not in the AU */ BaseAtyponMetadataUtil.completeMetadata(cu, am); emitter.emitMetadata(cu, am); }