/**
   * Get the Mime type of an Asset based on its type. If the Asset already has the "content-type"
   * property set, we return that. Otherwise the Apache Tika library is used to do file type
   * detection.
   *
   * @return A string representation of the content type suitable for use in an HTTP header. Eg.
   *     "image/jpeg" for a jpeg image.
   */
  public <T> String getMimeType(Entity entity, T type) {

    Map<String, Object> fileMetadata = AssetUtils.getFileMetadata(entity);
    if (fileMetadata.get(AssetUtils.CONTENT_TYPE) != null) {
      return (String) fileMetadata.get(AssetUtils.CONTENT_TYPE);
    }

    Metadata metadata = new Metadata();
    MediaType mediaType = MediaType.OCTET_STREAM;
    try {
      if (type instanceof byte[]) {

        ByteArrayInputStream bais = new ByteArrayInputStream((byte[]) type);
        mediaType = detector.detect(bais, metadata);
      } else if (type instanceof File) {

        InputStream fis = new BufferedInputStream(new FileInputStream((File) type));
        try {
          mediaType = detector.detect(fis, metadata);
        } finally {
          fis.close();
        }
      } else {
        return mediaType.toString();
      }

      fileMetadata.put(AssetUtils.CONTENT_TYPE, mediaType.toString());
    } catch (IOException e) {
      LOG.error("error detecting mime type", e);
    }

    return mediaType.toString();
  }
Beispiel #2
0
  /**
   * We don't currently support the .xlsb file format (an OOXML container with binary blobs), but we
   * shouldn't break on these files either (TIKA-826)
   */
  @Test
  public void testExcelXLSB() throws Exception {
    Detector detector = new DefaultDetector();
    AutoDetectParser parser = new AutoDetectParser();

    Metadata m = new Metadata();
    m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");

    // Should be detected correctly
    MediaType type;
    try (InputStream input =
        ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
      type = detector.detect(input, m);
      assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
    }

    // OfficeParser won't handle it
    assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));

    // OOXMLParser won't handle it
    assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));

    // AutoDetectParser doesn't break on it
    try (InputStream input =
        ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
      ContentHandler handler = new BodyContentHandler(-1);
      ParseContext context = new ParseContext();
      context.set(Locale.class, Locale.US);
      parser.parse(input, handler, m, context);

      String content = handler.toString();
      assertEquals("", content);
    }
  }
    @Override
    public void parse(
        InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {
      // Is it a supported image?
      String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
      String type = metadata.get(Metadata.CONTENT_TYPE);
      boolean accept = false;

      if (type != null) {
        for (MediaType mt : types) {
          if (mt.toString().equals(type)) {
            accept = true;
          }
        }
      }
      if (filename != null) {
        for (MediaType mt : types) {
          String ext = "." + mt.getSubtype();
          if (filename.endsWith(ext)) {
            accept = true;
          }
        }
      }

      if (!accept) return;

      handleImage(stream, filename, type);
    }
  public String getContentType(String fileName) {
    if (Validator.isNull(fileName)) {
      return ContentTypes.APPLICATION_OCTET_STREAM;
    }

    try {
      Metadata metadata = new Metadata();

      metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);

      MediaType mediaType = _detector.detect(null, metadata);

      String contentType = mediaType.toString();

      if (!contentType.contains("tika")) {
        return contentType;
      } else if (_log.isDebugEnabled()) {
        _log.debug("Retrieved invalid content type " + contentType);
      }
    } catch (Exception e) {
      _log.error(e, e);
    }

    return ContentTypes.APPLICATION_OCTET_STREAM;
  }
 static {
   SUPPORTED_MIMETYPES = new ArrayList<String>();
   Parser p = new PackageParser();
   for (MediaType mt : p.getSupportedTypes(null)) {
     // Tika can probably do some useful text
     SUPPORTED_MIMETYPES.add(mt.toString());
   }
 }
  public void parse(
      InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
      throws IOException, SAXException, TikaException {
    // Automatically detect the character encoding
    AutoDetectReader reader =
        new AutoDetectReader(
            new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER));
    try {
      Charset charset = reader.getCharset();
      String previous = metadata.get(Metadata.CONTENT_TYPE);
      MediaType contentType = null;
      if (previous == null || previous.startsWith("text/html")) {
        contentType = new MediaType(MediaType.TEXT_HTML, charset);
      } else if (previous.startsWith("application/xhtml+xml")) {
        contentType = new MediaType(XHTML, charset);
      } else if (previous.startsWith("application/vnd.wap.xhtml+xml")) {
        contentType = new MediaType(WAP_XHTML, charset);
      } else if (previous.startsWith("application/x-asp")) {
        contentType = new MediaType(X_ASP, charset);
      }
      if (contentType != null) {
        metadata.set(Metadata.CONTENT_TYPE, contentType.toString());
      }
      // deprecated, see TIKA-431
      metadata.set(Metadata.CONTENT_ENCODING, charset.name());

      // Get the HTML mapper from the parse context
      HtmlMapper mapper = context.get(HtmlMapper.class, new HtmlParserMapper());

      // Parse the HTML document
      org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();

      // Use schema from context or default
      Schema schema = context.get(Schema.class, HTML_SCHEMA);

      // TIKA-528: Reuse share schema to avoid heavy instantiation
      parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema);
      // TIKA-599: Shared schema is thread-safe only if bogons are ignored
      parser.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);

      parser.setContentHandler(
          new XHTMLDowngradeHandler(new HtmlHandler(mapper, handler, metadata)));

      parser.parse(reader.asInputSource());
    } finally {
      reader.close();
    }
  }
 /**
  * Detects the content type of the given input event. Returns <code>application/octet-stream
  * </code> if the type of the event can not be detected.
  *
  * <p>It is legal for the event headers or body to be empty. The detector may read bytes from
  * the start of the body stream to help in type detection.
  *
  * @return detected media type, or <code>application/octet-stream</code>
  */
 private String getMediaType(InputStream in, Metadata metadata, boolean excludeParameters) {
   MediaType mediaType;
   try {
     mediaType = getDetector().detect(in, metadata);
   } catch (IOException e) {
     throw new MorphlineRuntimeException(e);
   }
   String mediaTypeStr = mediaType.toString();
   if (excludeParameters) {
     int i = mediaTypeStr.indexOf(';');
     if (i >= 0) {
       mediaTypeStr = mediaTypeStr.substring(0, i);
     }
   }
   return mediaTypeStr;
 }
  /** @return SiteMap/SiteMapIndex given a content type, byte content and the URL of a sitemap */
  public AbstractSiteMap parseSiteMap(String contentType, byte[] content, URL url)
      throws UnknownFormatException, IOException {
    MediaType mediaType = MediaType.parse(contentType);

    // Octet-stream is the father of all binary types
    while (mediaType != null && !mediaType.equals(MediaType.OCTET_STREAM)) {
      if (XML_MEDIA_TYPES.contains(mediaType)) {
        return processXml(url, content);
      } else if (TEXT_MEDIA_TYPES.contains(mediaType)) {
        return (AbstractSiteMap) processText(url.toString(), content);
      } else if (GZ_MEDIA_TYPES.contains(mediaType)) {
        return processGzip(url, content);
      } else {
        mediaType = MEDIA_TYPE_REGISTRY.getSupertype(mediaType); // Check
        // parent
        return parseSiteMap(mediaType.toString(), content, url);
      }
    }

    throw new UnknownFormatException(
        "Can't parse a sitemap with the MediaType of: " + contentType + " (at: " + url + ")");
  }
  public String getContentType(InputStream inputStream, String fileName) {
    if ((inputStream == null) && Validator.isNull(fileName)) {
      return ContentTypes.APPLICATION_OCTET_STREAM;
    }

    String contentType = null;

    try {
      Metadata metadata = new Metadata();

      metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);

      MediaType mediaType = _detector.detect(TikaInputStream.get(inputStream), metadata);

      contentType = mediaType.toString();

      if (contentType.contains("tika")) {
        if (_log.isDebugEnabled()) {
          _log.debug("Retrieved invalid content type " + contentType);
        }

        contentType = getContentType(fileName);
      }

      if (contentType.contains("tika")) {
        if (_log.isDebugEnabled()) {
          _log.debug("Retrieved invalid content type " + contentType);
        }

        contentType = ContentTypes.APPLICATION_OCTET_STREAM;
      }
    } catch (Exception e) {
      _log.error(e, e);

      contentType = ContentTypes.APPLICATION_OCTET_STREAM;
    }

    return contentType;
  }
Beispiel #10
0
  private static Metadata tika_parse(File audioFile) {
    Metadata metadata = new Metadata();
    try {
      String filetype = new Tika().detect(audioFile);
      metadata.set("tika.filetype", filetype);
      metadata.set("file.size", Long.toString(audioFile.length()));

      BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(audioFile));
      new AutoDetectParser().parse(inputStream, new BodyContentHandler(), metadata);
      inputStream.close();

      for (String key : metadata.names()) {
        StringBuilder dataBuilder = new StringBuilder();
        if (metadata.isMultiValued(key)) {
          for (String val : metadata.getValues(key)) {
            if (dataBuilder.length() > 1) {
              dataBuilder.append(", ");
            }
            dataBuilder.append(val);
          }
        } else {
          dataBuilder.append(metadata.get(key));
        }
        metadata.set(key, dataBuilder.toString().trim());
      }

      inputStream = new BufferedInputStream(new FileInputStream(audioFile));
      MediaType media = new DefaultDetector().detect(inputStream, new Metadata());
      metadata.set("media", media.toString());
    } catch (SAXException | IOException | TikaException e) {
      metadata.set(
          "error_tika_parse",
          "tika_parse error processing file (" + audioFile.getName() + "): " + e.getMessage());
    }
    return metadata;
  }
Beispiel #11
0
    public void parseEmbedded(
        InputStream inputStream,
        ContentHandler contentHandler,
        Metadata metadata,
        boolean outputHtml)
        throws SAXException, IOException {
      String name = metadata.get(Metadata.RESOURCE_NAME_KEY);

      if (name == null) {
        name = "file" + count++;
      }

      MediaType contentType = detector.detect(inputStream, metadata);

      if (name.indexOf('.') == -1 && contentType != null) {
        try {
          name += config.getMimeRepository().forName(contentType.toString()).getExtension();
        } catch (MimeTypeException e) {
          e.printStackTrace();
        }
      }

      String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID);
      if (relID != null && !name.startsWith(relID)) {
        name = relID + "_" + name;
      }

      File outputFile = new File(extractDir, name);
      File parent = outputFile.getParentFile();
      if (!parent.exists()) {
        if (!parent.mkdirs()) {
          throw new IOException("unable to create directory \"" + parent + "\"");
        }
      }
      System.out.println("Extracting '" + name + "' (" + contentType + ") to " + outputFile);

      FileOutputStream os = null;

      try {
        os = new FileOutputStream(outputFile);

        if (inputStream instanceof TikaInputStream) {
          TikaInputStream tin = (TikaInputStream) inputStream;

          if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
            POIFSFileSystem fs = new POIFSFileSystem();
            copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
            fs.writeFilesystem(os);
          } else {
            IOUtils.copy(inputStream, os);
          }
        } else {
          IOUtils.copy(inputStream, os);
        }
      } catch (Exception e) {
        logger.warn("Ignoring unexpected exception trying to save embedded file " + name, e);
      } finally {
        if (os != null) {
          os.close();
        }
      }
    }
Beispiel #12
0
  /** Excel 5 and 95 are older formats, and only get basic support */
  @Test
  public void testExcel95() throws Exception {
    Detector detector = new DefaultDetector();
    AutoDetectParser parser = new AutoDetectParser();
    MediaType type;
    Metadata m;

    // First try detection of Excel 5
    m = new Metadata();
    m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
    try (InputStream input =
        ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
      type = detector.detect(input, m);
      assertEquals("application/vnd.ms-excel", type.toString());
    }

    // Now Excel 95
    m = new Metadata();
    m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
    try (InputStream input =
        ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
      type = detector.detect(input, m);
      assertEquals("application/vnd.ms-excel", type.toString());
    }

    // OfficeParser can handle it
    assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));

    // OOXMLParser won't handle it
    assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));

    // Parse the Excel 5 file
    m = new Metadata();
    try (InputStream input =
        ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
      ContentHandler handler = new BodyContentHandler(-1);
      ParseContext context = new ParseContext();
      context.set(Locale.class, Locale.US);
      parser.parse(input, handler, m, context);

      String content = handler.toString();

      // Sheet names
      assertContains("Feuil1", content);
      assertContains("Feuil3", content);

      // Text
      assertContains("Sample Excel", content);
      assertContains("Number", content);

      // Numbers
      assertContains("15", content);
      assertContains("225", content);

      // Metadata was also fetched
      assertEquals("Simple Excel document", m.get(TikaCoreProperties.TITLE));
      assertEquals("Keith Bennett", m.get(TikaCoreProperties.CREATOR));
    }

    // Parse the Excel 95 file
    m = new Metadata();
    try (InputStream input =
        ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
      ContentHandler handler = new BodyContentHandler(-1);
      ParseContext context = new ParseContext();
      context.set(Locale.class, Locale.US);
      parser.parse(input, handler, m, context);

      String content = handler.toString();

      // Sheet name
      assertContains("Foglio1", content);

      // Very boring file, no actual text or numbers!

      // Metadata was also fetched
      assertEquals(null, m.get(TikaCoreProperties.TITLE));
      assertEquals("Marco Quaranta", m.get(Office.LAST_AUTHOR));
    }
  }
    public SolrCell(
        CommandBuilder builder,
        Config config,
        Command parent,
        Command child,
        MorphlineContext context) {
      super(builder, config, parent, child, context);

      Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
      SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
      LOG.debug("solrLocator: {}", locator);
      this.schema = locator.getIndexSchema();
      Preconditions.checkNotNull(schema);
      LOG.trace(
          "Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values()));

      ListMultimap<String, String> cellParams = ArrayListMultimap.create();
      String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null);
      if (uprefix != null) {
        cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix);
      }
      for (String capture :
          getConfigs()
              .getStringList(
                  config, ExtractingParams.CAPTURE_ELEMENTS, Collections.<String>emptyList())) {
        cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture);
      }
      Config fmapConfig = getConfigs().getConfig(config, "fmap", null);
      if (fmapConfig != null) {
        for (Map.Entry<String, Object> entry : new Configs().getEntrySet(fmapConfig)) {
          cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString());
        }
      }
      String captureAttributes =
          getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null);
      if (captureAttributes != null) {
        cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes);
      }
      String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null);
      if (lowerNames != null) {
        cellParams.put(ExtractingParams.LOWERNAMES, lowerNames);
      }
      String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null);
      if (defaultField != null) {
        cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField);
      }
      xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null);
      if (xpathExpr != null) {
        cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
      }

      this.dateFormats =
          getConfigs()
              .getStringList(config, "dateFormats", new ArrayList<>(DateUtil.DEFAULT_DATE_FORMATS));

      String handlerStr =
          getConfigs()
              .getString(
                  config,
                  "solrContentHandlerFactory",
                  TrimSolrContentHandlerFactory.class.getName());
      Class<? extends SolrContentHandlerFactory> factoryClass;
      try {
        factoryClass = (Class<? extends SolrContentHandlerFactory>) Class.forName(handlerStr);
      } catch (ClassNotFoundException cnfe) {
        throw new MorphlineCompilationException(
            "Could not find class " + handlerStr + " to use for " + "solrContentHandlerFactory",
            config,
            cnfe);
      }
      this.solrContentHandlerFactory =
          getSolrContentHandlerFactory(factoryClass, dateFormats, config);

      this.locale = getLocale(getConfigs().getString(config, "locale", ""));

      this.mediaTypeToParserMap = new HashMap<>();
      // MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME
      // getMediaTypeRegistry.normalize()

      List<? extends Config> parserConfigs = getConfigs().getConfigList(config, "parsers");
      for (Config parserConfig : parserConfigs) {
        String parserClassName = getConfigs().getString(parserConfig, "parser");

        Object obj;
        try {
          obj = Class.forName(parserClassName).newInstance();
        } catch (Throwable e) {
          throw new MorphlineCompilationException(
              "Cannot instantiate Tika parser: " + parserClassName, config, e);
        }
        if (!(obj instanceof Parser)) {
          throw new MorphlineCompilationException(
              "Tika parser "
                  + obj.getClass().getName()
                  + " must be an instance of class "
                  + Parser.class.getName(),
              config);
        }
        Parser parser = (Parser) obj;
        this.parsers.add(parser);

        List<String> mediaTypes =
            getConfigs()
                .getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
        for (String mediaTypeStr : mediaTypes) {
          MediaType mediaType = parseMediaType(mediaTypeStr);
          addSupportedMimeType(mediaTypeStr);
          this.mediaTypeToParserMap.put(mediaType, parser);
        }

        if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) {
          for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) {
            mediaType = mediaType.getBaseType();
            addSupportedMimeType(mediaType.toString());
            this.mediaTypeToParserMap.put(mediaType, parser);
          }
          List<String> extras =
              getConfigs()
                  .getStringList(
                      parserConfig,
                      ADDITIONAL_SUPPORTED_MIME_TYPES,
                      Collections.<String>emptyList());
          for (String mediaTypeStr : extras) {
            MediaType mediaType = parseMediaType(mediaTypeStr);
            addSupportedMimeType(mediaTypeStr);
            this.mediaTypeToParserMap.put(mediaType, parser);
          }
        }
      }
      // LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap);

      Map<String, String[]> tmp = new HashMap();
      for (Map.Entry<String, Collection<String>> entry : cellParams.asMap().entrySet()) {
        tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()]));
      }
      this.solrParams = new MultiMapSolrParams(tmp);
      validateArguments();
    }
  /**
   * Gets the content and defers to registered viewers to generate the markup.
   *
   * @param request servlet request
   * @param response servlet response
   * @throws ServletException if a servlet-specific error occurs
   * @throws IOException if an I/O error occurs
   */
  @Override
  protected void doGet(final HttpServletRequest request, final HttpServletResponse response)
      throws ServletException, IOException {
    // specify the charset in a response header
    response.addHeader("Content-Type", "text/html; charset=UTF-8");

    // get the content
    final ServletContext servletContext = request.getServletContext();
    final ContentAccess contentAccess =
        (ContentAccess) servletContext.getAttribute("nifi-content-access");

    final ContentRequestContext contentRequest = getContentRequest(request);
    if (contentRequest.getDataUri() == null) {
      request.setAttribute("title", "Error");
      request.setAttribute("messages", "The data reference must be specified.");

      // forward to the error page
      final ServletContext viewerContext = servletContext.getContext("/nifi");
      viewerContext.getRequestDispatcher("/message").forward(request, response);
      return;
    }

    // get the content
    final DownloadableContent downloadableContent;
    try {
      downloadableContent = contentAccess.getContent(contentRequest);
    } catch (final ResourceNotFoundException rnfe) {
      request.setAttribute("title", "Error");
      request.setAttribute("messages", "Unable to find the specified content");

      // forward to the error page
      final ServletContext viewerContext = servletContext.getContext("/nifi");
      viewerContext.getRequestDispatcher("/message").forward(request, response);
      return;
    } catch (final AccessDeniedException ade) {
      request.setAttribute("title", "Acess Denied");
      request.setAttribute(
          "messages", "Unable to approve access to the specified content: " + ade.getMessage());

      // forward to the error page
      final ServletContext viewerContext = servletContext.getContext("/nifi");
      viewerContext.getRequestDispatcher("/message").forward(request, response);
      return;
    } catch (final Exception e) {
      request.setAttribute("title", "Error");
      request.setAttribute("messages", "An unexcepted error has occurred: " + e.getMessage());

      // forward to the error page
      final ServletContext viewerContext = servletContext.getContext("/nifi");
      viewerContext.getRequestDispatcher("/message").forward(request, response);
      return;
    }

    // determine how we want to view the data
    String mode = request.getParameter("mode");

    // if the name isn't set, use original
    if (mode == null) {
      mode = DisplayMode.Original.name();
    }

    // determine the display mode
    final DisplayMode displayMode;
    try {
      displayMode = DisplayMode.valueOf(mode);
    } catch (final IllegalArgumentException iae) {
      request.setAttribute("title", "Error");
      request.setAttribute("messages", "Invalid display mode: " + mode);

      // forward to the error page
      final ServletContext viewerContext = servletContext.getContext("/nifi");
      viewerContext.getRequestDispatcher("/message").forward(request, response);
      return;
    }

    // buffer the content to support reseting in case we need to detect the content type or char
    // encoding
    try (final BufferedInputStream bis =
        new BufferedInputStream(downloadableContent.getContent()); ) {
      final String mimeType;

      // when standalone and we don't know the type is null as we were able to directly access the
      // content bypassing the rest endpoint,
      // when clustered and we don't know the type set to octet stream since the content was
      // retrieved from the node's rest endpoint
      if (downloadableContent.getType() == null
          || downloadableContent.getType().equals(MediaType.OCTET_STREAM.toString())) {
        // attempt to detect the content stream if we don't know what it is ()
        final DefaultDetector detector = new DefaultDetector();

        // create the stream for tika to process, buffered to support reseting
        final TikaInputStream tikaStream = TikaInputStream.get(bis);

        // provide a hint based on the filename
        final Metadata metadata = new Metadata();
        metadata.set(Metadata.RESOURCE_NAME_KEY, downloadableContent.getFilename());

        // Get mime type
        final MediaType mediatype = detector.detect(tikaStream, metadata);
        mimeType = mediatype.toString();
      } else {
        mimeType = downloadableContent.getType();
      }

      // add attributes needed for the header
      request.setAttribute("filename", downloadableContent.getFilename());
      request.setAttribute("contentType", mimeType);

      // generate the header
      request.getRequestDispatcher("/WEB-INF/jsp/header.jsp").include(request, response);

      // remove the attributes needed for the header
      request.removeAttribute("filename");
      request.removeAttribute("contentType");

      // generate the markup for the content based on the display mode
      if (DisplayMode.Hex.equals(displayMode)) {
        final byte[] buffer = new byte[BUFFER_LENGTH];
        final int read = StreamUtils.fillBuffer(bis, buffer, false);

        // trim the byte array if necessary
        byte[] bytes = buffer;
        if (read != buffer.length) {
          bytes = new byte[read];
          System.arraycopy(buffer, 0, bytes, 0, read);
        }

        // convert bytes into the base 64 bytes
        final String base64 = Base64.encodeBase64String(bytes);

        // defer to the jsp
        request.setAttribute("content", base64);
        request.getRequestDispatcher("/WEB-INF/jsp/hexview.jsp").include(request, response);
      } else {
        // lookup a viewer for the content
        final String contentViewerUri = servletContext.getInitParameter(mimeType);

        // handle no viewer for content type
        if (contentViewerUri == null) {
          request.getRequestDispatcher("/WEB-INF/jsp/no-viewer.jsp").include(request, response);
        } else {
          // create a request attribute for accessing the content
          request.setAttribute(
              ViewableContent.CONTENT_REQUEST_ATTRIBUTE,
              new ViewableContent() {
                @Override
                public InputStream getContentStream() {
                  return bis;
                }

                @Override
                public String getContent() throws IOException {
                  // detect the charset
                  final CharsetDetector detector = new CharsetDetector();
                  detector.setText(bis);
                  detector.enableInputFilter(true);
                  final CharsetMatch match = detector.detect();

                  // ensure we were able to detect the charset
                  if (match == null) {
                    throw new IOException("Unable to detect character encoding.");
                  }

                  // convert the stream using the detected charset
                  return IOUtils.toString(bis, match.getName());
                }

                @Override
                public ViewableContent.DisplayMode getDisplayMode() {
                  return displayMode;
                }

                @Override
                public String getFileName() {
                  return downloadableContent.getFilename();
                }

                @Override
                public String getContentType() {
                  return mimeType;
                }
              });

          try {
            // generate the content
            final ServletContext viewerContext = servletContext.getContext(contentViewerUri);
            viewerContext.getRequestDispatcher("/view-content").include(request, response);
          } catch (final Exception e) {
            String message = e.getMessage() != null ? e.getMessage() : e.toString();
            message = "Unable to generate view of data: " + message;

            // log the error
            logger.error(message);
            if (logger.isDebugEnabled()) {
              logger.error(StringUtils.EMPTY, e);
            }

            // populate the request attributes
            request.setAttribute("title", "Error");
            request.setAttribute("messages", message);

            // forward to the error page
            final ServletContext viewerContext = servletContext.getContext("/nifi");
            viewerContext.getRequestDispatcher("/message").forward(request, response);
            return;
          }

          // remove the request attribute
          request.removeAttribute(ViewableContent.CONTENT_REQUEST_ATTRIBUTE);
        }
      }

      // generate footer
      request.getRequestDispatcher("/WEB-INF/jsp/footer.jsp").include(request, response);
    }
  }