@Dynamic("editor") public static Result publish() { Form<utils.Forms.PublicationBinder> bindedForm = form(utils.Forms.PublicationBinder.class).bindFromRequest(); Long l = bindedForm.get().location; Long i = bindedForm.get().interest; if (i != null && l != null) { final User u = Mupi.getLocalUser(session()); final models.Profile p = u.profile; String safeBody = Jsoup.clean( bindedForm.get().body, Whitelist.basicWithImages() .addEnforcedAttribute("a", "target", "_blank") .addTags("h1", "h2")); Publication.create( p, models.Location.find.byId(l), models.Interest.find.byId(i), PubType.get(bindedForm.get().pub_typ), safeBody); } return selectFeed(getLocalInterest(), getLocalLocation()); }
@Test public void simpleBehaviourTest() { String h = "<div><p class=foo><a href='http://evil.com'>Hello <b id=bar>there</b>!</a></div>"; String cleanHtml = Jsoup.clean(h, Whitelist.simpleText()); assertEquals("Hello <b>there</b>!", TextUtil.stripNewlines(cleanHtml)); }
/** * Gets the content of the article, and creates the final section for the generated report (html) * * @param aLink * @return */ public static String getArticleContent(String aLink, ArticleBodyParser parser) { Document doc = null; String htmltext = ""; if (Objects.nonNull(aLink) && !aLink.isEmpty()) { try { doc = Jsoup.connect(aLink) .header("Accept-Encoding", "gzip, deflate") .userAgent(userAgent) .timeout(6000) .followRedirects(true) .maxBodySize(0) .get(); } catch (IOException e) { LOGGER.log( Level.WARNING, "Error connecting, while fetching the article with link " + aLink); } if (doc != null) { Element title = doc.getElementsByTag("title").first(); Element mainArticle = parser.parseArticleFromDoc(doc); if (Objects.nonNull(title) && Objects.nonNull(mainArticle)) { htmltext = HtmlContentWriterUtil.generateArticleHtml( title.html(), aLink, Jsoup.clean(mainArticle.html(), Whitelist.basic())); } else { LOGGER.warning("We could not fetch the title and main body for link: " + aLink); } } } return htmltext; }
@Test public void simpleBehaviourTest2() { String h = "Hello <b>there</b>!"; String cleanHtml = Jsoup.clean(h, Whitelist.simpleText()); assertEquals("Hello <b>there</b>!", TextUtil.stripNewlines(cleanHtml)); }
public static String improveHtmlContent(String content, String baseUri) { content = ADS_PATTERN.matcher(content).replaceAll(""); if (content != null) { // remove some ads content = ADS_PATTERN.matcher(content).replaceAll(""); // remove lazy loading images stuff content = LAZY_LOADING_PATTERN.matcher(content).replaceAll(" src=$1"); // clean by JSoup content = Jsoup.clean(content, baseUri, JSOUP_WHITELIST); // remove empty or bad images content = EMPTY_IMAGE_PATTERN.matcher(content).replaceAll(""); content = BAD_IMAGE_PATTERN.matcher(content).replaceAll(""); // remove empty links content = EMPTY_LINK_PATTERN.matcher(content).replaceAll(""); // fix non http image paths content = NON_HTTP_IMAGE_PATTERN.matcher(content).replaceAll(" $1=$2http://"); // remove trailing BR & too much BR content = START_BR_PATTERN.matcher(content).replaceAll(""); // TODO: quick (and dirty) fix for #11. I HAVE TO FIND ANOTHER SOLUTION !! // content = END_BR_PATTERN.matcher(content).replaceAll(""); // TODO: end of fix for #11 content = MULTIPLE_BR_PATTERN.matcher(content).replaceAll("<br><br>"); } return content; }
@Test public void testClensing() { String pageTitle = StringEscapeUtils.unescapeHtml4(Jsoup.clean("Jeppistä jee", Whitelist.simpleText())); assertTrue("Jeppistä jee".equals(pageTitle)); }
@Test public void testRelaxed() { String h = "<h1>Head</h1><td>One<td>Two</td>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals( "<h1>Head</h1><table><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>", TextUtil.stripNewlines(cleanHtml)); }
@Test public void basicWithImagesTest() { String h = "<div><p><img src='http://example.com/' alt=Image></p><p><img src='ftp://ftp.example.com'></p></div>"; String cleanHtml = Jsoup.clean(h, Whitelist.basicWithImages()); assertEquals( "<p><img src=\"http://example.com/\" alt=\"Image\" /></p><p><img /></p>", TextUtil.stripNewlines(cleanHtml)); }
/** * Gets the latest comments with the specified fetch size. * * <p>The returned comments content is plain text. * * @param fetchSize the specified fetch size * @return the latest comments, returns an empty list if not found * @throws ServiceException service exception */ public List<JSONObject> getLatestComments(final int fetchSize) throws ServiceException { final Query query = new Query() .addSort(Comment.COMMENT_CREATE_TIME, SortDirection.DESCENDING) .setCurrentPageNum(1) .setPageSize(fetchSize) .setPageCount(1); try { final JSONObject result = commentRepository.get(query); final List<JSONObject> ret = CollectionUtils.<JSONObject>jsonArrayToList(result.optJSONArray(Keys.RESULTS)); for (final JSONObject comment : ret) { comment.put(Comment.COMMENT_CREATE_TIME, comment.optLong(Comment.COMMENT_CREATE_TIME)); final String articleId = comment.optString(Comment.COMMENT_ON_ARTICLE_ID); final JSONObject article = articleRepository.get(articleId); comment.put( Comment.COMMENT_T_ARTICLE_TITLE, Emotions.clear(article.optString(Article.ARTICLE_TITLE))); comment.put( Comment.COMMENT_T_ARTICLE_PERMALINK, article.optString(Article.ARTICLE_PERMALINK)); final String commenterId = comment.optString(Comment.COMMENT_AUTHOR_ID); final JSONObject commenter = userRepository.get(commenterId); if (UserExt.USER_STATUS_C_INVALID == commenter.optInt(UserExt.USER_STATUS) || Comment.COMMENT_STATUS_C_INVALID == comment.optInt(Comment.COMMENT_STATUS)) { comment.put(Comment.COMMENT_CONTENT, langPropsService.get("commentContentBlockLabel")); } if (Article.ARTICLE_TYPE_C_DISCUSSION == article.optInt(Article.ARTICLE_TYPE)) { comment.put(Comment.COMMENT_CONTENT, "...."); } String content = comment.optString(Comment.COMMENT_CONTENT); content = Emotions.clear(content); content = Jsoup.clean(content, Whitelist.none()); if (StringUtils.isBlank(content)) { comment.put(Comment.COMMENT_CONTENT, "...."); } else { comment.put(Comment.COMMENT_CONTENT, content); } final String commenterEmail = comment.optString(Comment.COMMENT_AUTHOR_EMAIL); final String avatarURL = avatarQueryService.getAvatarURL(commenterEmail); commenter.put(UserExt.USER_AVATAR_URL, avatarURL); comment.put(Comment.COMMENT_T_COMMENTER, commenter); } return ret; } catch (final RepositoryException e) { LOGGER.log(Level.ERROR, "Gets user comments failed", e); throw new ServiceException(e); } }
/** Turns a HTML document back into a set of text, elements, annotations. */ public static UnrenderedBlip unrender(final String content) { final StringBuilder sb = new StringBuilder(); final Map<Integer, com.google.wave.api.Element> elements = Maps.newHashMap(); final Annotations annotations = new Annotations(); // Sanitized final String safe = Jsoup.clean(content, Whitelist.basic()); final Document doc = Jsoup.parse(safe); unrender(doc.body(), sb, elements, annotations); return new UnrenderedBlip(sb.toString(), elements, annotations); }
protected String cleaner(String rs) { Whitelist wlist = new Whitelist(); wlist.addTags("txt", "p"); wlist.addTags("table", "tbody", "tr", "td"); wlist.addTags("img").addAttributes("img", "src"); return Jsoup.clean(rs, wlist); }
@Test public void basicBehaviourTest() { String h = "<div><p><a href='javascript:sendAllMoney()'>Dodgy</a> <A HREF='HTTP://nice.com'>Nice</p><blockquote>Hello</blockquote>"; String cleanHtml = Jsoup.clean(h, Whitelist.basic()); assertEquals( "<p><a rel=\"nofollow\">Dodgy</a> <a href=\"http://nice.com\" rel=\"nofollow\">Nice</a></p><blockquote>Hello</blockquote>", TextUtil.stripNewlines(cleanHtml)); }
public static Pair<String, String> lyrics(String link) { try { // Get the lyrics final Document doc = Jsoup.connect(link).get(); // Get the title final String safeTitle = Jsoup.clean(doc.title().replaceAll(" Lyrics \\| MetroLyrics", ""), Whitelist.basic()); // Get the Lyrics final Elements rawLyrics = doc.select("#lyrics-body-text"); final String safeLyrics = Jsoup.clean(rawLyrics.toString(), Whitelist.basic()); return Pair.of(safeTitle, safeLyrics); } catch (IOException e) { // There's been some error, so return null return null; } }
private String transformResponse(final String xslt, final String response) { if (xslt == null || "".equals(xslt)) { // if not found, return as is return response; } ByteArrayInputStream respInStream = null; ByteArrayInputStream xsltInStream = null; Writer outWriter = null; try { final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); factory.setNamespaceAware(true); final DocumentBuilder builder = factory.newDocumentBuilder(); respInStream = new ByteArrayInputStream(response.getBytes("UTF-8")); final Document document = builder.parse(respInStream); xsltInStream = new ByteArrayInputStream(xslt.getBytes()); final StreamSource stylesource = new StreamSource(xsltInStream); final String transformedResponse = getFormatedJSONString(document, stylesource); if (transformedResponse == null || transformedResponse.isEmpty()) { log.info("got empty result from transform with:", xslt, " - Response:", response); return response; } return transformedResponse; } catch (Exception e) { log.error(e, "Error transforming GFI response: ", response, "- with XSLT:", xslt); } finally { if (respInStream != null) { try { respInStream.close(); } catch (Exception ignored) { } } if (xsltInStream != null) { try { xsltInStream.close(); } catch (Exception ignored) { } } if (outWriter != null) { try { outWriter.close(); } catch (Exception ignored) { } } } // Sanitize response return Jsoup.clean(response, Whitelist.relaxed()); }
@RequestMapping(value = "getrecordslowmediumhigh", method = RequestMethod.GET) public ResponseEntity<?> getRecordslowMediumHigh( HttpServletRequest request, HttpServletResponse response) // SessionStatus status) { String simpleScenario = request.getParameter("simpleScenario"); String safesimpleScenario = Jsoup.clean(simpleScenario, Whitelist.basic()); String varR = request.getParameter("varR"); String safevarR = Jsoup.clean(varR, Whitelist.basic()); String season = request.getParameter("season"); String safeseason = Jsoup.clean(season, Whitelist.basic()); String year = request.getParameter("yearSimple"); String safeyear = Jsoup.clean(year, Whitelist.basic()); // PointModel model = ClimateRecordHandler.FindClimateRecordsHighMediumLow(safesimpleScenario, // safeyear, safevarR, safeseason); List<ClimateRecord> res = climateRecordService.findPointModellowMediumHigh(simpleScenario, safeyear, varR, season); ClimateRecordHandler ch = new ClimateRecordHandler(); PointModel model = null; if (res.size() > 0) { model = ch.FindClimateRecordsHighMediumLow(simpleScenario, safeyear, varR, season, res); } if (model != null) { // status.setComplete(); return new ResponseEntity<PointModel>(model, HttpStatus.OK); } return new ResponseEntity<String>( "Nothing was found for selected values", HttpStatus.BAD_REQUEST); }
/** * get the content in the html without tags * * @param url * @return * @throws IOException */ public String getHTMLContent(String url) throws IOException { try { Document doc = Jsoup.connect(url).get(); // Remove html white spaces doc.select(":containsOwn(\u00a0)").remove(); // Remove remaining HTML code String content = doc.text(); content = Jsoup.clean(content, Whitelist.relaxed()); return content; } catch (SocketTimeoutException e) { System.out.println("Time out exception, url: " + url); return ""; } }
/** * Strips any potential XSS threats out of the value * * @param value * @return */ public static String stripXSS(String value) { LOG.debug("Value before stripping: " + value); if (value != null) { // Use the ESAPI library to avoid encoded attacks. value = ESAPI.encoder().canonicalize(value); // Avoid null characters value = value.replaceAll("\0", ""); // Clean out HTML value = Jsoup.clean(value, Whitelist.none()); } LOG.debug("Value after stripping: " + value); return value; }
@Restrict(Mupi.USER_ROLE) public static Result comment(String body, Long id) { final User u = Mupi.getLocalUser(session()); final models.Profile p = u.profile; final models.Publication pub = models.Publication.find.byId(id); String safeBody = Jsoup.clean( textWithLinks(body.replaceAll("(\r\n|\n)", " <br/> ")), Whitelist.none().addTags("br", "a").addAttributes("a", "href", "target")); if (pub != null) PubComment.create(pub, p, safeBody); List<UserEmail> l_ue = models.Profile.emailsFromPublication(pub); for (UserEmail ue : l_ue) { if (u.getEmail().equalsIgnoreCase(ue.getEmail())) System.out.println("Commenter: " + ue.getEmail()); else System.out.println(ue.getEmail()); } return selectFeed(getLocalInterest(), getLocalLocation()); }
@Test public void resolvesRelativeLinks() { String html = "<a href='/foo'>Link</a>"; String clean = Jsoup.clean(html, "http://example.com/", Whitelist.basic()); assertEquals("<a href=\"http://example.com/foo\" rel=\"nofollow\">Link</a>", clean); }
@Test public void testAllowsValidAnchors() { String h = "<A HREF=\"#myanchor\">anchored</A>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("<a href=\"#myanchor\">anchored</a>", cleanHtml); }
@Test public void testCleanJavascriptHref() { String h = "<A HREF=\"javascript:document.location='http://www.google.com/'\">XSS</A>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("<a>XSS</a>", cleanHtml); }
@Test public void testDropImageScript() { String h = "<IMG SRC=\"javascript:alert('XSS')\">"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("<img />", cleanHtml); }
@Test public void testDropScript() { String h = "<SCRIPT SRC=//ha.ckers.org/.j><SCRIPT>alert(/XSS/.source)</SCRIPT>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("", cleanHtml); }
@Test public void testDropXmlProc() { String h = "<?import namespace=\"xss\"><p>Hello</p>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("<p>Hello</p>", cleanHtml); }
@Test public void testDropComments() { String h = "<p>Hello<!-- no --></p>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("<p>Hello</p>", cleanHtml); }
@Test public void allowsRelativeLinksIfConfiguredThusly() { String html = "<a href='/foo'>Link</a>"; String clean = Jsoup.clean(html, Whitelist.basic().setUseAbsoluteURLs(false)); assertEquals("<a href=\"/foo\" rel=\"nofollow\">Link</a>", clean); }
@Test public void dropsUnresolvableRelativeLinks() { String html = "<a href='/foo'>Link</a>"; String clean = Jsoup.clean(html, Whitelist.basic()); assertEquals("<a rel=\"nofollow\">Link</a>", clean); }
/** * Gets article preview content. * * <p>Renders the response with a json object, for example, * * <pre> * { * "html": "" * } * </pre> * * @param request the specified http servlet request * @param response the specified http servlet response * @param context the specified http request context * @param articleId the specified article id * @throws Exception exception */ @RequestProcessing(value = "/article/{articleId}/preview", method = HTTPRequestMethod.GET) @Before(adviceClass = StopwatchStartAdvice.class) @After(adviceClass = StopwatchEndAdvice.class) public void getArticlePreviewContent( final HttpServletRequest request, final HttpServletResponse response, final HTTPRequestContext context, final String articleId) throws Exception { final JSONRenderer renderer = new JSONRenderer(); context.setRenderer(renderer); final JSONObject result = Results.trueResult(); renderer.setJSONObject(result); result.put("html", ""); final JSONObject article = articleQueryService.getArticle(articleId); if (null == article) { result.put(Keys.STATUS_CODE, false); return; } final int length = Integer.valueOf("150"); String content = article.optString(Article.ARTICLE_CONTENT); final String authorId = article.optString(Article.ARTICLE_AUTHOR_ID); final JSONObject author = userQueryService.getUser(authorId); if (null != author && UserExt.USER_STATUS_C_INVALID == author.optInt(UserExt.USER_STATUS) || Article.ARTICLE_STATUS_C_INVALID == article.optInt(Article.ARTICLE_STATUS)) { result.put("html", langPropsService.get("articleContentBlockLabel")); return; } final Set<String> userNames = userQueryService.getUserNames(content); final JSONObject currentUser = userQueryService.getCurrentUser(request); final String currentUserName = null == currentUser ? "" : currentUser.optString(User.USER_NAME); final String authorName = author.optString(User.USER_NAME); if (Article.ARTICLE_TYPE_C_DISCUSSION == article.optInt(Article.ARTICLE_TYPE) && !authorName.equals(currentUserName)) { boolean invited = false; for (final String userName : userNames) { if (userName.equals(currentUserName)) { invited = true; break; } } if (!invited) { String blockContent = langPropsService.get("articleDiscussionLabel"); blockContent = blockContent.replace( "{user}", "<a href='" + Latkes.getServePath() + "/member/" + authorName + "'>" + authorName + "</a>"); result.put("html", blockContent); return; } } content = Emotions.convert(content); content = Markdowns.toHTML(content); content = Jsoup.clean(content, Whitelist.none()); if (content.length() >= length) { content = StringUtils.substring(content, 0, length) + " ...."; } result.put("html", content); }
@Test public void testDropsUnknownTags() { String h = "<p><custom foo=true>Test</custom></p>"; String cleanHtml = Jsoup.clean(h, Whitelist.relaxed()); assertEquals("<p>Test</p>", cleanHtml); }
@Test public void testHandlesEmptyAttributes() { String h = "<img alt=\"\" src= unknown=''>"; String cleanHtml = Jsoup.clean(h, Whitelist.basicWithImages()); assertEquals("<img alt=\"\" />", cleanHtml); }