// override this to do some additional attempts to get valid data before emitting @Override public void extract(MetadataTarget target, CachedUrl cu, FileMetadataExtractor.Emitter emitter) throws IOException, PluginException { ArticleMetadata am = extract(target, cu); /* * if, due to overcrawl, we got to a page that didn't have anything * valid, eg "this page not found" html page * don't emit empty metadata (because defaults would get put in * Must do this after cooking, because it checks size of cooked info */ if (am.isEmpty()) { return; } /* * RIS data can be variable. We don't have any way to add priority to * the cooking of data, so fallback to alternate values manually * * There are differences between books and journals, so fork for titles * and metadata check */ if (am.get(MetadataField.FIELD_DATE) == null) { if (am.getRaw("Y1") != null) { // if DA wasn't there, use Y1 am.put(MetadataField.FIELD_DATE, am.getRaw("Y1")); } } /* * Determine if this is a book item or a journal item. * set the appropriate article type once the daemon passes along the TY */ String ris_type = am.getRaw("TY"); if (ris_type == null) { // pre 1.69, do an alternate check because TY wasn't passed through ris_type = "JOUR"; // set a default if (am.get(MetadataField.FIELD_ISBN) != null) { // it is a bad value, but it was recognized as an isbn because of TY type ris_type = "BOOK"; // it could be a chapter but until TY is passed through... } } // Modify or try alternate RIS tag values based after cooking postCookProcess(cu, am, ris_type); // Only emit if this item is likely to be from this AU // protect against counting overcrawled articles by checking against // values from the TDB file - differentiate between book items and journal itesm ArchivalUnit au = cu.getArchivalUnit(); if (ris_type.contains("BOOK") || ris_type.contains("CHAP")) { if (!BaseAtyponMetadataUtil.metadataMatchesBookTdb(au, am)) { return; } } else { // JOURNAL default is to assume it's a journal for backwards compatibility if (!BaseAtyponMetadataUtil.metadataMatchesTdb(au, am)) { return; } } /* * Fill in DOI, publisher, other information available from * the URL or TDB * CORRECT the access.url if it is not in the AU */ BaseAtyponMetadataUtil.completeMetadata(cu, am); emitter.emitMetadata(cu, am); }