void emitLastRecord(Reporter reporter) throws IOException {

    if (_flags != 0) {
      if (_domainStats == null) {
        reporter.incrCounter(Counters.EMITTING_URL_RECORD_WITH_NULL_DOMAINSTATS, 1);
      } else {
        reporter.incrCounter(Counters.EMITTING_URL_RECORD_WITH_DOMINSTATS, 1);
      }

      if (_crawlStatus != null) {
        reporter.incrCounter(Counters.EMITTED_RECORD_HAD_CRAWLSTATUS, 1);
      } else {
        reporter.incrCounter(Counters.EMITTED_RECORD_HAD_NULL_CRAWLSTATUS, 1);
      }
    }

    if (_contextURLBytes.getLength() >= 4097) {
      reporter.incrCounter(Counters.SKIPPING_INVALID_LENGTH_URL, 1);
    } else {
      GoogleURL urlObject = new GoogleURL(_contextURLBytes.toString());

      if (!skipRecord(urlObject, reporter)) {

        if (urlObject.has_query()) {
          reporter.incrCounter(Counters.LET_THROUGH_QUERY_URL, 1);
        }

        URLFPV2 fp = URLUtils.getURLFPV2FromURLObject(urlObject);
        if (fp != null) {
          if (_emittedURLSFilter.isPresent(fp)) {
            reporter.incrCounter(Counters.SKIPPING_ALREADY_EMITTED_URL, 1);
          } else {
            _emittedURLSFilter.add(fp);
            _emittedURLSInFilter++;

            SegmentGeneratorItem itemValue = new SegmentGeneratorItem();

            itemValue.setDomainFP(fp.getDomainHash());
            itemValue.setRootDomainFP(fp.getRootDomainHash());
            itemValue.setUrlFP(fp.getUrlHash());
            itemValue.setUrl(urlObject.getCanonicalURL());
            itemValue.setPageRank(0);
            itemValue.setModifiedStatus((byte) 0);

            items.add(itemValue);

            if (items.size() >= SPILL_THRESHOLD) spillItems(reporter);
          }
        } else {
          reporter.incrCounter(Counters.NULL_FP_FOR_URL, 1);
        }
      }
    }

    // reset stuff
    _flags = 0;
    _crawlStatus = null;
    _contextURLBytes.clear();
    _blogURLSkipFlag.set(true);
  }
  @Test
  public void testSourceInputOutputWriters() throws IOException {
    _sourceInputsBuffer = new DataOutputBuffer(16348 * 4);
    _sourceInputsTrackingFilter = new URLFPBloomFilter(100000, NUM_HASH_FUNCTIONS, NUM_BITS);

    String sourceDomainURL = "http://sourcedomain.com/foo";
    URLFPV2 sourceFP = URLUtils.getURLFPV2FromCanonicalURL(sourceDomainURL);

    String urls[] = {"http://somedomain.com/foo", "http://someother.com/bar"};

    for (String url : urls) {
      URLFPV2 fp = URLUtils.getURLFPV2FromCanonicalURL(url);
      // double insert and validate actual single insertion
      trackPotentialLinkSource(fp, url, sourceFP);
      trackPotentialLinkSource(fp, url, sourceFP);
    }

    //  validate data ...
    TextBytes firstVersion = new TextBytes();
    firstVersion.set(_sourceInputsBuffer.getData(), 0, _sourceInputsBuffer.getLength());

    StringTokenizer tokenizer = new StringTokenizer(firstVersion.toString(), "\n");
    int itemIndex = 0;
    while (tokenizer.hasMoreElements()) {
      String nextLine = tokenizer.nextToken();
      String splits[] = nextLine.split("\t");
      // validate fp
      URLFPV2 fp = URLUtils.getURLFPV2FromCanonicalURL(urls[itemIndex]);
      Assert.assertEquals(fp.getDomainHash(), Long.parseLong(splits[0]));
      // validate actual url ...
      Assert.assertEquals(splits[1], urls[itemIndex]);
      itemIndex++;
    }

    // reset output buffer ...
    _sourceInputsBuffer = new DataOutputBuffer(16348 * 4);
    // and source bloom filter ...
    _sourceInputsTrackingFilter = new URLFPBloomFilter(10000000, NUM_HASH_FUNCTIONS, NUM_BITS);
    importLinkSourceData(sourceFP, firstVersion);
    // second text should match first ..
    TextBytes secondVersion = new TextBytes();
    secondVersion.set(_sourceInputsBuffer.getData(), 0, _sourceInputsBuffer.getLength());
    Assert.assertEquals(firstVersion, secondVersion);
  }
    /**
     * update the model from the raw (generated tuples)
     *
     * @param tuple
     * @throws Exception
     */
    void updateModelFromInputTuple(Pair<TextBytes, TextBytes> tuple) throws Exception {
      URLFPV2 fp = new URLFPV2();
      // get key ...
      fp.setRootDomainHash(
          CrawlDBKey.getLongComponentFromKey(
              tuple.e0, CrawlDBKey.ComponentId.ROOT_DOMAIN_HASH_COMPONENT_ID));
      fp.setDomainHash(
          CrawlDBKey.getLongComponentFromKey(
              tuple.e0, CrawlDBKey.ComponentId.DOMAIN_HASH_COMPONENT_ID));
      fp.setUrlHash(
          CrawlDBKey.getLongComponentFromKey(
              tuple.e0, CrawlDBKey.ComponentId.URL_HASH_COMPONENT_ID));

      long recordType =
          CrawlDBKey.getLongComponentFromKey(tuple.e0, CrawlDBKey.ComponentId.TYPE_COMPONENT_ID);

      if (recordType == CrawlDBKey.Type.KEY_TYPE_CRAWL_STATUS.ordinal()
          || recordType == CrawlDBKey.Type.KEY_TYPE_HTML_LINK.ordinal()) {
        // update model given key ...
        URLStateModel urlModel = fpToModelMap.get(fp);
        if (urlModel == null) {
          urlModel = new URLStateModel();
          urlModel.fp = fp;
          fpToModelMap.put(fp, urlModel);
        }

        if (recordType == CrawlDBKey.Type.KEY_TYPE_CRAWL_STATUS.ordinal()) {
          JsonObject redirectJSON = urlModel.updateModelGivenCrawlStatus(tuple.e1);

          if (redirectJSON != null) {
            URLFPV2 redirectFP =
                URLUtils.getURLFPV2FromURL(redirectJSON.get("source_url").getAsString());
            TextBytes key =
                CrawlDBKey.generateKey(
                    redirectFP,
                    CrawlDBKey.Type.KEY_TYPE_CRAWL_STATUS,
                    redirectJSON.get("attempt_time").getAsLong());
            Pair<TextBytes, TextBytes> redirectTuple =
                new Pair<TextBytes, TextBytes>(key, new TextBytes(redirectJSON.toString()));
            updateModelFromInputTuple(redirectTuple);
          }

        } else if (recordType == CrawlDBKey.Type.KEY_TYPE_HTML_LINK.ordinal()) {
          urlModel.updateModelGivenLinkRecord(tuple.e1);
        }
      }
    }
    public void updateModelGivenLinkRecord(TextBytes linkJSON) {

      JsonParser parser = new JsonParser();
      JsonObject jsonObj = parser.parse(linkJSON.toString()).getAsJsonObject();
      if (source_url == null) {
        source_url = jsonObj.get("href").getAsString();
      }

      String sourceURL = jsonObj.get("source_url").getAsString();
      URLFPV2 urlfp = URLUtils.getURLFPV2FromURL(sourceURL);
      if (urlfp != null) {
        if (urlfp.getRootDomainHash() != fp.getRootDomainHash()) {
          if (!incoming.containsKey(urlfp.getRootDomainHash())) {
            incoming.put(urlfp.getRootDomainHash(), sourceURL);
          }
        }
      }
    }