@Override public void extract(MetadataTarget target, CachedUrl cu, Emitter emitter) throws IOException { ArticleMetadata am = new SimpleHtmlMetaTagMetadataExtractor().extract(target, cu); am.cook(tagMap); String url = am.get(MetadataField.FIELD_ACCESS_URL); ArchivalUnit au = cu.getArchivalUnit(); if (url == null || url.isEmpty() || !au.makeCachedUrl(url).hasContent()) { url = cu.getUrl(); } am.replace( MetadataField.FIELD_ACCESS_URL, HttpToHttpsUtil.AuUtil.normalizeHttpHttpsFromBaseUrl(au, url)); emitter.emitMetadata(cu, am); }
/** * Generates a scanner for the specified input file. * * @param inputFile a file containing a lexical specification to generate a scanner for. */ public static void generate(File inputFile) { Out.resetCounters(); Timer totalTime = new Timer(); Timer time = new Timer(); LexScan scanner = null; LexParse parser = null; FileReader inputReader = null; totalTime.start(); try { Out.println(ErrorMessages.READING, inputFile.toString()); inputReader = new FileReader(inputFile); scanner = new LexScan(inputReader); scanner.setFile(inputFile); parser = new LexParse(scanner); } catch (FileNotFoundException e) { Out.error(ErrorMessages.CANNOT_OPEN, inputFile.toString()); throw new GeneratorException(); } try { NFA nfa = (NFA) parser.parse().value; Out.checkErrors(); if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.NFA_IS) + Out.NL + nfa + Out.NL); if (Options.dot) nfa.writeDot(Emitter.normalize("nfa.dot", null)); // $NON-NLS-1$ Out.println(ErrorMessages.NFA_STATES, nfa.numStates); time.start(); DFA dfa = nfa.getDFA(); time.stop(); Out.time(ErrorMessages.DFA_TOOK, time); dfa.checkActions(scanner, parser); nfa = null; if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.DFA_IS) + Out.NL + dfa + Out.NL); if (Options.dot) dfa.writeDot(Emitter.normalize("dfa-big.dot", null)); // $NON-NLS-1$ Out.checkErrors(); time.start(); dfa.minimize(); time.stop(); Out.time(ErrorMessages.MIN_TOOK, time); if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.MIN_DFA_IS) + Out.NL + dfa); if (Options.dot) dfa.writeDot(Emitter.normalize("dfa-min.dot", null)); // $NON-NLS-1$ time.start(); Emitter e = new Emitter(inputFile, parser, dfa); e.emit(); time.stop(); Out.time(ErrorMessages.WRITE_TOOK, time); totalTime.stop(); Out.time(ErrorMessages.TOTAL_TIME, totalTime); } catch (ScannerException e) { Out.error(e.file, e.message, e.line, e.column); throw new GeneratorException(); } catch (MacroException e) { Out.error(e.getMessage()); throw new GeneratorException(); } catch (IOException e) { Out.error(ErrorMessages.IO_ERROR, e.toString()); throw new GeneratorException(); } catch (OutOfMemoryError e) { Out.error(ErrorMessages.OUT_OF_MEMORY); throw new GeneratorException(); } catch (GeneratorException e) { throw new GeneratorException(); } catch (Exception e) { e.printStackTrace(); throw new GeneratorException(); } }
@Override public void extract(MetadataTarget target, CachedUrl cu, Emitter emitter) { emitter.emitMetadata(cu, metadata); }