private void tsvWrite(URL url) throws IOException { logger.info("Process feed URL: " + url.toString()); RssFeed feed = null; try { feed = RssReader.read(url); } catch (SAXException e) { throw new IOException("Fail in Sax Parsing " + url.toString()); } List<RssItem> items = feed.getRssItems(); StringBuffer sb = new StringBuffer(); for (RssItem item : items) { String regDate = (new Date()).toString(); String link = item.getLink(); if (link == null || link.trim().length() == 0) { logger.info("Link is null or length=0\tURL: " + url.toString()); continue; } if (dupCheckLinkSet.contains(link)) { continue; } String descriptionHtml = item.getDescription(); if (descriptionHtml == null) { logger.info("Description is null\turl: " + url.toString()); descriptionHtml = ""; } descriptionHtml = descriptionHtml.replaceAll("[\\t\\n\\r]", " "); String descriptionText = HtmlUtil.stripHtml(descriptionHtml); descriptionText = descriptionText.replaceAll("[\\t\\n\\r]", " "); String title = item.getTitle(); if (title == null || title.trim().length() == 0) { logger.info("Title is null or length=0\turl: " + url.toString()); continue; } title = title.replaceAll("[\\t\\n\\r]", " "); Date pubDateTemp = item.getPubDate(); if (pubDateTemp == null) { logger.info("PubDate is null\turl: " + url.toString()); continue; } String pubDate = pubDateTemp.toString(); if (pubDate.trim().length() == 0) { logger.info("PubDate length=0\turl: " + url.toString()); continue; } String line = String.format( "%s\t%s\t%s\t%s\t%s\t%s\n", regDate, pubDate, link, title, descriptionHtml, descriptionText); sb.append(line); } synchronized (documentTsvBw) { documentTsvBw.write(sb.toString()); documentTsvBw.flush(); } }