/** * Parsing existing entities * * @param pEntity */ public void parseIndividualEnt(PersistentEntity pEntity) { Document doc = null; Iterator<Entry<Utf8, Utf8>> it = pEntity.getSameAs().entrySet().iterator(); while (it.hasNext()) { Map.Entry<Utf8, Utf8> pairs = (Map.Entry<Utf8, Utf8>) it.next(); // Reading individual URLs LOGGER.info( "Parsing entity from: " + ParserUtils.getUri(pairs.getKey().toString()).toASCIIString()); doc = ParserUtils.connectGetUrl(ParserUtils.getUri(pairs.getKey().toString()).toASCIIString()); if (doc == null && !validateSite(doc)) { break; } else { doc.setBaseUri(VejaSaoPauloParser.DEFAULT_VSP_URL); StringBuilder strBuilder = new StringBuilder(); // getting working hours Elements workElems = doc.select("div[class*=information-unwanted]").select("div[class*=working-hours]"); if (workElems != null && workElems.size() > 0) { for (Element info : workElems.select("div[class*=hours]").select("p")) strBuilder.append(info.text().replace("-", "_")).append(ParserProperties.INFO_SEP); pEntity.setSchedule(new Utf8(strBuilder.toString())); } // getting price range workElems = doc.select("div[class*=information-unwanted]") .select("div[class*=price]") .select("p[class*=price-range]"); strBuilder.delete(0, strBuilder.length()); if (workElems != null && workElems.size() > 0) { strBuilder.append( doc.select("div[class*=price]").select("h3").first().text() + ParserProperties.DESC_SEP); strBuilder.append(workElems.text()); pEntity.addToExtraInfo(new Utf8(strBuilder.toString())); // LOGGER.debug(strBuilder.toString()); } // getting payment information workElems = doc.select("div[class*=information-unwanted]") .select("div[class*=payment]") .select("p"); strBuilder.delete(0, strBuilder.length()); if (workElems != null && workElems.size() > 0) { strBuilder.append( doc.select("div[class*=payment]").select("h3").first().text() + ParserProperties.DESC_SEP); for (Element infoElem : workElems) if (!infoElem.text().trim().equals("")) { strBuilder.append(infoElem.text().trim() + ParserProperties.INFO_SEP); } } // END-IF_PAYMENT // getting services provided information workElems = doc.select("div[class*=information-unwanted]") .select("div[class*=services]") .select("div[class*=information-services]") .select("p"); strBuilder.delete(0, strBuilder.length()); if (workElems != null && workElems.size() > 0) { for (Element infoElem : workElems) { if (infoElem.hasClass("observation")) { pEntity.addToExtraInfo(new Utf8("Observation :" + infoElem.text())); } else if (!infoElem.text().equals("")) { pEntity.addToServices(new Utf8(infoElem.text())); } // LOGGER.debug(infoElem.text()); } } // END-IF_SERVICES // getting home url workElems = doc.select("div[class*=information-unwanted]").select("div[class*=website]"); if (workElems != null && workElems.size() > 0) { EylloLink homeLink = ParserUtils.detectUrl( workElems .select("div[class*=information-website]") .select("p") .select("a") .first()); if (homeLink != null) { pEntity.setHomepage(new Utf8(homeLink.getLinkHref())); pEntity.putToSameAs(new Utf8(homeLink.getLinkHref()), new Utf8(homeLink.getLinkText())); } } // END-IF_URL pEntity.setDescription(new Utf8("")); } // END-IF_VALID_URL } // END-WHILE }