/** ********************************** */ public List<ActionType> getActions( FeedSelectors feedSelectors, Document doc, String idPost, boolean json, boolean comments, String source) throws IOException, BadLocationException { List<ActionType> list = new ArrayList<ActionType>(); String selectorAuthor = null, selectorText = null, selectorDate = null; String expressionAuthor = null, expressionText = null, expressionDate = null; String author = null, text = null, id = null, timestamp = null; String patternDate = null; Matcher matcher; Pattern patron; // FileInputStream datos= new FileInputStream (ConDatos); /** ********** */ if (feedSelectors == null || feedSelectors.getSelectors() == null || feedSelectors.getSelectors().isEmpty()) { feedSelectors = dao.retrieve("default"); } if (feedSelectors == null || feedSelectors.getSelectors() == null || feedSelectors.getSelectors().isEmpty()) { return null; } Elements elmts = null; boolean commenters = false; for (FeedSelector feedSelector : feedSelectors.getSelectors()) { if ("commentsAuthor".equals(feedSelector.getType())) { selectorAuthor = feedSelector.getSelector(); if (!feedSelector.getValidator().equals("")) { expressionAuthor = feedSelector.getValidator(); } } if ("commentsText".equals(feedSelector.getType())) { selectorText = feedSelector.getSelector(); if (!feedSelector.getValidator().equals("")) { expressionText = feedSelector.getValidator(); } } if ("commentsDate".equals(feedSelector.getType())) { selectorDate = feedSelector.getSelector(); if (!feedSelector.getValidator().equals("")) { expressionDate = feedSelector.getValidator(); } } if ("comments".equals(feedSelector.getType())) { elmts = doc.select(feedSelector.getSelector()); commenters = true; } if (feedSelector.getFormat() != null && !feedSelector.getFormat().equals("")) { patternDate = feedSelector.getFormat(); } } newEntryComments = new PostType(); newEntryCommentsSolr = new Post(); if (commenters && elmts.size() > 0) { Integer cantCommnents = 0; for (Element comment : elmts) { Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "Comentario: {0}", comment.html()); Boolean validComment = false; if (selectorAuthor != null) { for (Element element : comment.select(selectorAuthor)) { author = element.text(); if (expressionAuthor == null) { break; } else { if (author != null && !"".equals(author)) { patron = Pattern.compile(expressionAuthor); matcher = patron.matcher(author); if (matcher.find()) { author = author.replaceAll(matcher.group(1), ""); break; } } } } } Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "SelectorText; {0}", selectorText); if (selectorText != null) { for (Element element : comment.select(selectorText)) { if (expressionText != null) { String html = element.html(); Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Text HTML: {0}", html); if (html != null && !"".equals(html)) { patron = Pattern.compile("^[^<].*"); matcher = patron.matcher(html); if (matcher.find()) { text = element.text(); Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "Matchea, Text; {0}", text); validComment = true; break; } else { validComment = false; } } else { validComment = false; } } else { text = element.text(); validComment = true; break; } } } if (selectorDate != null) { Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "Selector Fecha: {0}", selectorDate); for (Element element : comment.select(selectorDate)) { timestamp = element.text(); if (expressionDate != null && timestamp != null && !"".equals(timestamp)) { Logger.getLogger(this.getClass().getName()) .log( Level.INFO, "Validador Fecha: {0}, Texto Fecha: {1}", new Object[] {expressionDate, timestamp}); patron = Pattern.compile(expressionDate); matcher = patron.matcher(timestamp); if (matcher.find()) { timestamp = timestamp.replaceAll(matcher.group(1), ""); break; } } else { break; } } } if (validComment && comments) { Logger.getLogger(this.getClass().getName()).log(Level.INFO, "El comentario es válido"); cantCommnents++; if (!json) { newEntryComments.setId(idPost + author + timestamp); newEntryComments.setSourcePost(idPost); newEntryComments.setDocType("comment"); newEntryComments.setFromUser(new User(id, author, null, null, null)); newEntryComments.setText(text); newEntryComments.setCreated(timestamp); newEntryComments.setSource(source); newsList.add(newEntryComments); } else { newEntryCommentsSolr.setId(idPost + author + timestamp); newEntryCommentsSolr.setSourcePost(idPost); newEntryCommentsSolr.setDocType("comment"); newEntryCommentsSolr.setFromUser(new User(id, author, null, null, null)); newEntryCommentsSolr.setText(text); newEntryCommentsSolr.setSource(source); if (patternDate != null) newEntryCommentsSolr.setCreated(getDate(timestamp, patternDate)); newsListSolr.add(newEntryCommentsSolr); } } // comments.add(new Comment(id , new User (id,author,null,null,null) , text , new // Date(timestamp))); } list.add(new ActionType("comments", cantCommnents)); } if (list.isEmpty()) { return new ArrayList(); } else { return new ArrayList(list); } }
public List<LinkType> getLinks(FeedSelectors feedSelectors, Document doc, String host) throws FileNotFoundException, IOException, BadLocationException { Boolean isAvatar = false; List<LinkType> list = new ArrayList<LinkType>(); // FileInputStream datos= new FileInputStream (ConDatos); /** ********** */ if (feedSelectors == null || feedSelectors.getSelectors() == null || feedSelectors.getSelectors().isEmpty()) { feedSelectors = dao.retrieve("default"); } if (feedSelectors == null || feedSelectors.getSelectors() == null || feedSelectors.getSelectors().isEmpty()) { return null; } Elements elmts; for (FeedSelector feedSelector : feedSelectors.getSelectors()) { if ("picture".equals(feedSelector.getType())) { elmts = doc.select(feedSelector.getSelector()); for (Element elmt : elmts) { String link = addHost(elmt.attr("src"), host); Logger.getLogger(this.getClass().getName()).log(Level.INFO, "PICTURE URL: {0}", link); if (checkImgSize(link)) { Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "SIZE VALIDO, AGREGO PICTURE"); list.add(new LinkType("picture", link)); } else { Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "SIZE NO VALIDO: {0}", link); } } } else if ("link".equals(feedSelector.getType())) { elmts = doc.select(feedSelector.getSelector()); for (Element elmt : elmts) { String link = addHost(elmt.attr("href"), host); list.add(new LinkType("link", link)); } } else if ("avatar".equals(feedSelector.getType())) { elmts = doc.select(feedSelector.getSelector()); for (Element elmt : elmts) { String link = addHost(elmt.attr("src"), host); list.add(new LinkType("avatar", link)); isAvatar = true; } } } Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Antes logo"); if (!isAvatar && feedSelectors.getUrlLogo() != null) { Logger.getLogger(this.getClass().getName()) .log(Level.INFO, "Obtengo logo por defecto: {0}", feedSelectors.getUrlLogo()); list.add(new LinkType("avatar", feedSelectors.getUrlLogo())); } if (list.isEmpty()) { return null; } else { return list; } }