/**
   * Builds a Tika-compatible SAX content handler, which will be used to generate+capture the XHTML
   */
  private ContentHandler buildContentHandler(Writer output, RenderingContext context) {
    // Create the main transformer
    SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
    TransformerHandler handler;

    try {
      handler = factory.newTransformerHandler();
    } catch (TransformerConfigurationException e) {
      throw new RenditionServiceException("SAX Processing isn't available - " + e);
    }

    handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
    handler.setResult(new StreamResult(output));
    handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");

    // Change the image links as they go past
    String dirName = null, imgPrefix = null;
    if (context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false)) {
      imgPrefix = getImagesPrefixName(context);
    } else {
      dirName = getImagesDirectoryName(context);
    }
    ContentHandler contentHandler =
        new TikaImageRewritingContentHandler(handler, dirName, imgPrefix);

    // If required, wrap it to only return the body
    boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
    if (bodyOnly) {
      contentHandler = new BodyContentHandler(contentHandler);
    }

    // All done
    return contentHandler;
  }
    private TikaImageExtractingParser(RenderingContext renderingContext) {
      this.renderingContext = renderingContext;

      // Our expected types
      types = new HashSet<MediaType>();
      types.add(MediaType.image("bmp"));
      types.add(MediaType.image("gif"));
      types.add(MediaType.image("jpg"));
      types.add(MediaType.image("jpeg"));
      types.add(MediaType.image("png"));
      types.add(MediaType.image("tiff"));

      // Are images going in the same place as the HTML?
      if (renderingContext.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false)) {
        RenditionLocation location =
            resolveRenditionLocation(
                renderingContext.getSourceNode(),
                renderingContext.getDefinition(),
                renderingContext.getDestinationNode());
        imgFolder = location.getParentRef();
        if (logger.isDebugEnabled()) {
          logger.debug("Using imgFolder: " + imgFolder);
        }
      }
    }
 /** What prefix should be applied to the name of images? */
 private String getImagesPrefixName(RenderingContext context) {
   if (context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false)) {
     // Prefix with the name of the source node
     return getHtmlBaseName(context) + "_";
   } else {
     // They have their own folder, so no prefix is needed
     return "";
   }
 }
  /** Asks Tika to translate the contents into HTML */
  private void generateHTML(Parser p, RenderingContext context) {
    ContentReader contentReader = context.makeContentReader();

    // Setup things to parse with
    StringWriter sw = new StringWriter();
    ContentHandler handler = buildContentHandler(sw, context);

    // Tell Tika what we're dealing with
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, contentReader.getMimetype());
    metadata.set(
        Metadata.RESOURCE_NAME_KEY,
        nodeService.getProperty(context.getSourceNode(), ContentModel.PROP_NAME).toString());

    // Our parse context needs to extract images
    ParseContext parseContext = new ParseContext();
    parseContext.set(Parser.class, new TikaImageExtractingParser(context));

    // Parse
    try {
      p.parse(contentReader.getContentInputStream(), handler, metadata, parseContext);
    } catch (Exception e) {
      throw new RenditionServiceException("Tika HTML Conversion Failed", e);
    }

    // As a string
    String html = sw.toString();

    // If we're doing body-only, remove all the html namespaces
    //  that will otherwise clutter up the document
    boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
    if (bodyOnly) {
      html = html.replaceAll("<\\?xml.*?\\?>", "");
      html = html.replaceAll("<p xmlns=\"http://www.w3.org/1999/xhtml\"", "<p");
      html = html.replaceAll("<h(\\d) xmlns=\"http://www.w3.org/1999/xhtml\"", "<h\\1");
      html = html.replaceAll("<div xmlns=\"http://www.w3.org/1999/xhtml\"", "<div");
      html = html.replaceAll("<table xmlns=\"http://www.w3.org/1999/xhtml\"", "<table");
      html = html.replaceAll("&#13;", "");
    }

    // Save it
    ContentWriter contentWriter = context.makeContentWriter();
    contentWriter.setMimetype("text/html");
    contentWriter.putContent(html);
  }