@Override
  public void execute(Tuple tuple) {

    byte[] content = tuple.getBinaryByField("content");
    String url = tuple.getStringByField("url");
    Metadata metadata = (Metadata) tuple.getValueByField("metadata");

    // check that its content type is HTML
    // look at value found in HTTP headers
    boolean CT_OK = false;
    String httpCT = metadata.getFirstValue(HttpHeaders.CONTENT_TYPE);
    if (StringUtils.isNotBlank(httpCT)) {
      if (httpCT.toLowerCase().contains("html")) {
        CT_OK = true;
      }
    }
    // simply ignore cases where the content type has not been set
    // TODO sniff content with Tika?
    else {
      CT_OK = true;
    }

    if (!CT_OK) {
      String errorMessage = "Exception content-type " + httpCT + " for " + url;
      RuntimeException e = new RuntimeException(errorMessage);
      handleException(url, e, metadata, tuple, "content-type checking", errorMessage);
      return;
    }

    LOG.info("Parsing : starting {}", url);

    long start = System.currentTimeMillis();

    String charset = getContentCharset(content, metadata);

    // get the robots tags from the fetch metadata
    RobotsTags robotsTags = new RobotsTags(metadata);

    Map<String, List<String>> slinks;
    String text;
    DocumentFragment fragment;
    try (ByteArrayInputStream bais = new ByteArrayInputStream(content)) {
      org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(bais, charset, url);

      fragment = JSoupDOMBuilder.jsoup2HTML(jsoupDoc);

      // extracts the robots directives from the meta tags
      robotsTags.extractMetaTags(fragment);

      // store a normalised representation in metadata
      // so that the indexer is aware of it
      robotsTags.normaliseToMetadata(metadata);

      // do not extract the links if no follow has been set
      // and we are in strict mode
      if (robotsTags.isNoFollow() && robots_noFollow_strict) {
        slinks = new HashMap<>(0);
      } else {
        Elements links = jsoupDoc.select("a[href]");
        slinks = new HashMap<>(links.size());
        for (Element link : links) {
          // abs:href tells jsoup to return fully qualified domains
          // for
          // relative urls.
          // e.g.: /foo will resolve to http://shopstyle.com/foo
          String targetURL = link.attr("abs:href");

          // nofollow
          boolean noFollow = "nofollow".equalsIgnoreCase(link.attr("rel"));
          // remove altogether
          if (noFollow && robots_noFollow_strict) {
            continue;
          }

          // link not specifically marked as no follow
          // but whole page is
          if (!noFollow && robotsTags.isNoFollow()) {
            noFollow = true;
          }

          String anchor = link.text();
          if (StringUtils.isNotBlank(targetURL)) {
            // any existing anchors for the same target?
            List<String> anchors = slinks.get(targetURL);
            if (anchors == null) {
              anchors = new LinkedList<>();
              slinks.put(targetURL, anchors);
            }
            // track the anchors only if no follow is false
            if (!noFollow && StringUtils.isNotBlank(anchor)) {
              anchors.add(anchor);
            }
          }
        }
      }

      text = jsoupDoc.body().text();

    } catch (Throwable e) {
      String errorMessage = "Exception while parsing " + url + ": " + e;
      handleException(url, e, metadata, tuple, "content parsing", errorMessage);
      return;
    }

    // store identified charset in md
    metadata.setValue("parse.Content-Encoding", charset);

    long duration = System.currentTimeMillis() - start;

    LOG.info("Parsed {} in {} msec", url, duration);

    List<Outlink> outlinks = toOutlinks(url, metadata, slinks);

    ParseResult parse = new ParseResult();
    parse.setOutlinks(outlinks);

    // parse data of the parent URL
    ParseData parseData = parse.get(url);
    parseData.setMetadata(metadata);
    parseData.setText(text);
    parseData.setContent(content);

    // apply the parse filters if any
    try {
      parseFilters.filter(url, content, fragment, parse);
    } catch (RuntimeException e) {

      String errorMessage = "Exception while running parse filters on " + url + ": " + e;
      handleException(url, e, metadata, tuple, "content filtering", errorMessage);
      return;
    }

    if (emitOutlinks) {
      for (Outlink outlink : parse.getOutlinks()) {
        collector.emit(
            StatusStreamName,
            tuple,
            new Values(outlink.getTargetURL(), outlink.getMetadata(), Status.DISCOVERED));
      }
    }

    // emit each document/subdocument in the ParseResult object
    // there should be at least one ParseData item for the "parent" URL

    for (Map.Entry<String, ParseData> doc : parse) {
      ParseData parseDoc = doc.getValue();

      collector.emit(
          tuple,
          new Values(
              doc.getKey(), parseDoc.getContent(), parseDoc.getMetadata(), parseDoc.getText()));
    }

    collector.ack(tuple);
    eventCounter.scope("tuple_success").incr();
  }