@Override
 public void extract(MetadataTarget target, CachedUrl cu, Emitter emitter) throws IOException {
   ArticleMetadata am = new SimpleHtmlMetaTagMetadataExtractor().extract(target, cu);
   am.cook(tagMap);
   String url = am.get(MetadataField.FIELD_ACCESS_URL);
   ArchivalUnit au = cu.getArchivalUnit();
   if (url == null || url.isEmpty() || !au.makeCachedUrl(url).hasContent()) {
     url = cu.getUrl();
   }
   am.replace(
       MetadataField.FIELD_ACCESS_URL,
       HttpToHttpsUtil.AuUtil.normalizeHttpHttpsFromBaseUrl(au, url));
   emitter.emitMetadata(cu, am);
 }
Exemple #2
0
  /**
   * Generates a scanner for the specified input file.
   *
   * @param inputFile a file containing a lexical specification to generate a scanner for.
   */
  public static void generate(File inputFile) {

    Out.resetCounters();

    Timer totalTime = new Timer();
    Timer time = new Timer();

    LexScan scanner = null;
    LexParse parser = null;
    FileReader inputReader = null;

    totalTime.start();

    try {
      Out.println(ErrorMessages.READING, inputFile.toString());
      inputReader = new FileReader(inputFile);
      scanner = new LexScan(inputReader);
      scanner.setFile(inputFile);
      parser = new LexParse(scanner);
    } catch (FileNotFoundException e) {
      Out.error(ErrorMessages.CANNOT_OPEN, inputFile.toString());
      throw new GeneratorException();
    }

    try {
      NFA nfa = (NFA) parser.parse().value;

      Out.checkErrors();

      if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.NFA_IS) + Out.NL + nfa + Out.NL);

      if (Options.dot) nfa.writeDot(Emitter.normalize("nfa.dot", null)); // $NON-NLS-1$

      Out.println(ErrorMessages.NFA_STATES, nfa.numStates);

      time.start();
      DFA dfa = nfa.getDFA();
      time.stop();
      Out.time(ErrorMessages.DFA_TOOK, time);

      dfa.checkActions(scanner, parser);

      nfa = null;

      if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.DFA_IS) + Out.NL + dfa + Out.NL);

      if (Options.dot) dfa.writeDot(Emitter.normalize("dfa-big.dot", null)); // $NON-NLS-1$

      Out.checkErrors();

      time.start();
      dfa.minimize();
      time.stop();

      Out.time(ErrorMessages.MIN_TOOK, time);

      if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.MIN_DFA_IS) + Out.NL + dfa);

      if (Options.dot) dfa.writeDot(Emitter.normalize("dfa-min.dot", null)); // $NON-NLS-1$

      time.start();

      Emitter e = new Emitter(inputFile, parser, dfa);
      e.emit();

      time.stop();

      Out.time(ErrorMessages.WRITE_TOOK, time);

      totalTime.stop();

      Out.time(ErrorMessages.TOTAL_TIME, totalTime);
    } catch (ScannerException e) {
      Out.error(e.file, e.message, e.line, e.column);
      throw new GeneratorException();
    } catch (MacroException e) {
      Out.error(e.getMessage());
      throw new GeneratorException();
    } catch (IOException e) {
      Out.error(ErrorMessages.IO_ERROR, e.toString());
      throw new GeneratorException();
    } catch (OutOfMemoryError e) {
      Out.error(ErrorMessages.OUT_OF_MEMORY);
      throw new GeneratorException();
    } catch (GeneratorException e) {
      throw new GeneratorException();
    } catch (Exception e) {
      e.printStackTrace();
      throw new GeneratorException();
    }
  }
 @Override
 public void extract(MetadataTarget target, CachedUrl cu, Emitter emitter) {
   emitter.emitMetadata(cu, metadata);
 }