예제 #1
0
  /**
   * Creates a default Tika configuration. First checks whether an XML config file is specified,
   * either in
   *
   * <ol>
   *   <li>System property "tika.config", or
   *   <li>Environment variable TIKA_CONFIG
   * </ol>
   *
   * <p>If one of these have a value, try to resolve it relative to file system or classpath.
   *
   * <p>If XML config is not specified, initialize from the built-in media type rules and all the
   * {@link Parser} implementations available through the {@link ServiceRegistry service provider
   * mechanism} in the context class loader of the current thread.
   *
   * @throws IOException if the configuration can not be read
   * @throws TikaException if problem with MimeTypes or parsing XML config
   */
  public TikaConfig() throws TikaException, IOException {
    this.serviceLoader = new ServiceLoader();

    String config = System.getProperty("tika.config");
    if (config == null) {
      config = System.getenv("TIKA_CONFIG");
    }

    if (config == null) {
      this.mimeTypes = getDefaultMimeTypes(ServiceLoader.getContextClassLoader());
      this.parser = getDefaultParser(mimeTypes, serviceLoader);
      this.detector = getDefaultDetector(mimeTypes, serviceLoader);
      this.translator = getDefaultTranslator(serviceLoader);
      this.executorService = getDefaultExecutorService();
    } else {
      try (InputStream stream = getConfigInputStream(config, serviceLoader)) {
        Element element = getBuilder().parse(stream).getDocumentElement();
        ParserXmlLoader parserLoader = new ParserXmlLoader();
        DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
        TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
        ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();

        this.mimeTypes = typesFromDomElement(element);
        this.parser = parserLoader.loadOverall(element, mimeTypes, serviceLoader);
        this.detector = detectorLoader.loadOverall(element, mimeTypes, serviceLoader);
        this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
        this.executorService = executorLoader.loadOverall(element, mimeTypes, serviceLoader);
      } catch (SAXException e) {
        throw new TikaException("Specified Tika configuration has syntax errors: " + config, e);
      }
    }
  }
예제 #2
0
  private TikaConfig(Element element, ServiceLoader loader) throws TikaException, IOException {
    ParserXmlLoader parserLoader = new ParserXmlLoader();
    DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
    TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
    ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();

    this.mimeTypes = typesFromDomElement(element);
    this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
    this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
    this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
    this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
    this.serviceLoader = loader;
  }