@Test public void testClone() { Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>"); Element p = doc.select("p").get(1); Element clone = p.clone(); assertNull(clone.parent()); // should be orphaned assertEquals(0, clone.siblingIndex); assertEquals(1, p.siblingIndex); assertNotNull(p.parent()); clone.append("<span>Three"); assertEquals( "<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml())); assertEquals( "<div><p>One</p><p><span>Two</span></p></div>", TextUtil.stripNewlines(doc.body().html())); // not modified doc.body().appendChild(clone); // adopt assertNotNull(clone.parent()); assertEquals( "<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(doc.body().html())); }
private String getUpdatedFileContent(List<Vacancy> vacancies) { Document document = null; try { document = getDocument(); document.html(); Element template = document.select("[class=vacancy template]").first(); Element templateCopy = template.clone(); templateCopy.removeAttr("style"); templateCopy.removeAttr("class"); templateCopy.addClass("vacancy"); document.select("tr[class=vacancy]").remove(); for (Vacancy vacancy : vacancies) { Element thisVacancyElement = templateCopy.clone(); thisVacancyElement.select("[class=city]").first().text(vacancy.getCity()); thisVacancyElement.select("[class=companyName]").first().text(vacancy.getCompanyName()); thisVacancyElement.select("[class=salary]").first().text(vacancy.getSalary()); thisVacancyElement .select("[class=title]") .select("a[href]") .first() .text(vacancy.getTitle()); thisVacancyElement .select("[class=title]") .select("a[href]") .first() .attr("href", vacancy.getUrl()); document.select("[class=vacancy template]").first().before(thisVacancyElement.outerHtml()); } } catch (IOException e) { e.printStackTrace(); System.out.println("Some exception occurred"); } return document.html(); }
@Test public void testChainedRemoveAttributes() { String html = "<a one two three four>Text</a>"; Document doc = Jsoup.parse(html); Element a = doc.select("a").first(); a.removeAttr("zero") .removeAttr("one") .removeAttr("two") .removeAttr("three") .removeAttr("four") .removeAttr("five"); assertEquals("<a>Text</a>", a.outerHtml()); }
@Test public void testAddBooleanAttribute() { Element div = new Element(Tag.valueOf("div"), ""); div.attr("true", true); div.attr("false", "value"); div.attr("false", false); assertTrue(div.hasAttr("true")); assertEquals("", div.attr("true")); List<Attribute> attributes = div.attributes().asList(); assertEquals("There should be one attribute", 1, attributes.size()); assertTrue("Attribute should be boolean", attributes.get(0) instanceof BooleanAttribute); assertFalse(div.hasAttr("false")); assertEquals("<div true></div>", div.outerHtml()); }
/** * Translates tables into individual html tables and puts them into the file_dir * * @param file_name * @param table_dir * @param pmc * @throws IOException */ public static void translateTables(File file, String file_dir, String paper_dir, Integer pmc) throws IOException { String sep = File.separator; ExtractFiles ext = new ExtractFiles(); File html = ext.convertHTML(file, paper_dir, pmc + ""); if (html != null) { Document doc = Jsoup.parse(html, "UTF-8"); Elements table_wrap = doc.getElementsByTag("table-wrap"); int count = 1; FileWriter w; for (Element e : table_wrap) { String id = e.attr("id"); if (id == null) { id = "T" + count; } File tab = new File(file_dir + sep + "PMC" + pmc + id + ".html"); tab.canWrite(); w = new FileWriter(tab); w.write(e.outerHtml()); w.close(); count++; } } }
public static void getDetails(MajorForCollection major) throws Exception { Connection conn = Jsoup.connect(major.getUrl()); Document doc = conn.timeout(10000).followRedirects(true).get(); Element e = null; if (doc.select("table.course-page__table-basic").size() > 0) { e = doc.select("table.course-page__table-basic").get(0); for (Element tr : e.select("tr")) { if (tr.text().contains("Duration")) { major.setLength(getLength(e.text())); } else if (tr.text().contains("Start date")) { major.setMonthOfEntry(getMonthOfEntry(e.text())); } } } if (doc.select("a.btn.btn-bordered").size() > 0) { e = doc.select("a.btn.btn-bordered").get(0); major.setApplicationFee(e.attr("href")); } if (doc.select("#entry-requirements-2").size() > 0) { e = doc.select("#entry-requirements-2").get(0); major.setAcademicRequirements(e.text()); } if (doc.select("div.course-page.row a").size() > 0) { e = doc.select("div.course-page.row a").last(); major.setSchool(e.attr("href")); if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/design-and-architecture")) { major.setSchool("Monash Art Design & Architecture"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/business-and-economics")) { major.setSchool("Monash Business School"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/arts")) { major.setSchool("Faculty of Arts, Monash University"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/science")) { major.setSchool("Faculty of Science"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/medicine")) { major.setSchool("Faculty of Medicine, Nursing and Health Sciences"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/education")) { major.setSchool("Faculty of Education - Faculty of Education"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/engineering")) { major.setSchool("Faculty of Engineering, Monash University"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/information-technology")) { major.setSchool("Faculty of Information Technology - Monash University"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/pharmacy")) { major.setSchool("Faculty of Pharmacy and Pharmaceutical Sciences"); } else if (major .getSchool() .equals("http://www.study.monash/media/links/faculty-websites/law")) { major.setSchool("Faculty of Law"); } } if (doc.select("#fees").size() > 0) { e = doc.select("#fees").get(0); major.setTuitionFee(e.nextElementSibling().text()); } if (!major .getApplicationFee() .equals("http://www.monash.edu.au/pubs/handbooks/courses/A6015.html") && !major .getApplicationFee() .equals("http://www.monash.edu.au/pubs/handbooks/courses/2276.html")) { doc = WebUtils.getDocument(major.getApplicationFee(), WebUtils.METHOD_GET, 10 * 1000); if (doc.select("h2.black.pub_heading:containsOwn(Requirements) + div.pub_body_text").size() > 0) { e = doc.select("h2.black.pub_heading:containsOwn(Requirements) + div.pub_body_text").get(0); major.setStructure(replaceSpecialCharacter(html2Str(e.outerHtml())).trim()); if (major.getStructure().contains("Part A.")) { major.setStructure( major.getStructure().substring(major.getStructure().indexOf("Part A."))); } else if (doc.select("h2.black.pub_heading:containsOwn(Structure) + div.pub_body_text") .size() > 0) { e = doc.select("h2.black.pub_heading:containsOwn(Structure) + div.pub_body_text").get(0); major.setStructure(replaceSpecialCharacter(html2Str(e.text())).trim()); } } } mark(major, true); }
private void migrateThird2( Elements listEles, Node sBThirdNode2, String locale, Map<String, String> urlMap, String catType, String type) throws RepositoryException { if (listEles != null) { int eleSize = listEles.size(); NodeIterator thirdNodes = sBThirdNode2.hasNode("Th-Third-1") ? sBThirdNode2.getNodes("Th-Third*") : null; if (thirdNodes != null) { int size = (int) thirdNodes.getSize(); for (Element list : listEles) { if (thirdNodes.hasNext()) { Node thirdNode = thirdNodes.nextNode(); if (thirdNode.hasNode("tile")) { Node tileNode = thirdNode.getNode("tile"); Element title = list.getElementsByTag("h3").first(); Element description = list.getElementsByTag("p").first(); Element anchor = list.getElementsByTag("a").first(); if (title != null) { tileNode.setProperty("title", title.text()); } else { sb.append(Constants.LIST_HEADING_COMPONENT_NOT_FOUND); } if (description != null) { tileNode.setProperty("description", description.text()); } else { sb.append(Constants.LIST_INTRO_PARAGRAPH_ELEMENT_NOT_FOUND); } if (anchor != null) { if (tileNode.hasNode("cta")) { Node ctaNode = tileNode.getNode("cta"); ctaNode.setProperty("linktext", anchor.text()); String aUrl = anchor.absUrl("href"); if (StringUtils.isBlank(aUrl)) { aUrl = anchor.attr("href"); } aUrl = FrameworkUtils.getLocaleReference(aUrl, urlMap, locale, sb); if (!aUrl.equals("") && !aUrl.isEmpty()) { ctaNode.setProperty("url", aUrl); } else { sb.append(Constants.LINK_URL_NOT_FOUND_IN_LIST); } } else { sb.append(Constants.LINK_DATA_NODE_FOR_LIST_NOT_FOUND); } tileNode.setProperty("title", title.text()); } else { sb.append(Constants.LIST_ANCHOR_ELEMENTS_NOT_FOUND); } } else { sb.append(Constants.LIST_ITEM_NODE_NOT_FOUND); } } else { sb.append( Constants.MISMATCH_IN_LIST_ELEMENT + eleSize + Constants.LIST_NODES_COUNT + size + ".</li>"); } } if (thirdNodes.hasNext()) { Element letUsHelp = doc.select("div.rc-persel").first(); if (letUsHelp != null) { Node thirdNode = thirdNodes.nextNode(); if (thirdNode.hasNode("tile")) { boolean check = true; Node tileNode = thirdNode.getNode("tile"); Element title = letUsHelp.getElementsByTag("h3").first(); Element description = letUsHelp.getElementsByTag("p").first(); Element dAnchor = letUsHelp.getElementsByTag("a").first(); letUsHelp.getElementsByTag("a").first().remove(); Element anchor = letUsHelp.getElementsByTag("a").last(); if (title != null) { tileNode.setProperty("title", title.text()); } else { sb.append(Constants.LIST_HEADING_COMPONENT_NOT_FOUND); } if (description != null) { String anchorTag = ""; if (dAnchor != null) { anchorTag = dAnchor.outerHtml(); } else { check = false; sb.append(Constants.LIST_ANCHOR_ELEMENTS_NOT_FOUND); } if (description.text().equals("")) { description = letUsHelp.getElementsByTag("p").last(); } tileNode.setProperty("description", description.text() + "</br></br>" + anchorTag); } else { sb.append(Constants.LIST_INTRO_PARAGRAPH_ELEMENT_NOT_FOUND); } if (anchor != null) { if (tileNode.hasNode("cta")) { Node ctaNode = tileNode.getNode("cta"); ctaNode.setProperty("linktext", anchor.text()); String aUrl = anchor.absUrl("href"); if (StringUtils.isBlank(aUrl)) { aUrl = anchor.attr("href"); } aUrl = FrameworkUtils.getLocaleReference(aUrl, urlMap, locale, sb); if (!aUrl.equals("") && !aUrl.isEmpty()) { ctaNode.setProperty("url", aUrl); } else { sb.append(Constants.LINK_URL_NOT_FOUND_IN_LIST); } } else { sb.append(Constants.LINK_DATA_NODE_FOR_LIST_NOT_FOUND); } } else { if (check) sb.append(Constants.LIST_ANCHOR_ELEMENTS_NOT_FOUND); } } else { sb.append(Constants.LIST_ITEM_NODE_NOT_FOUND); } } else { sb.append( Constants.MISMATCH_IN_LIST_NODES + eleSize + Constants.LIST_NODES_COUNT + size + ".</li>"); } } } else { sb.append(Constants.NO_LIST_NODES_FOUND); } } else { sb.append(Constants.LIST_COMPONENT_NOT_FOUND); } }