コード例 #1
0
  /** ********************************** */
  public List<ActionType> getActions(
      FeedSelectors feedSelectors,
      Document doc,
      String idPost,
      boolean json,
      boolean comments,
      String source)
      throws IOException, BadLocationException {

    List<ActionType> list = new ArrayList<ActionType>();

    String selectorAuthor = null, selectorText = null, selectorDate = null;
    String expressionAuthor = null, expressionText = null, expressionDate = null;
    String author = null, text = null, id = null, timestamp = null;
    String patternDate = null;
    Matcher matcher;
    Pattern patron;

    // FileInputStream datos= new FileInputStream (ConDatos);
    /** ********** */
    if (feedSelectors == null
        || feedSelectors.getSelectors() == null
        || feedSelectors.getSelectors().isEmpty()) {
      feedSelectors = dao.retrieve("default");
    }
    if (feedSelectors == null
        || feedSelectors.getSelectors() == null
        || feedSelectors.getSelectors().isEmpty()) {
      return null;
    }

    Elements elmts = null;
    boolean commenters = false;
    for (FeedSelector feedSelector : feedSelectors.getSelectors()) {

      if ("commentsAuthor".equals(feedSelector.getType())) {
        selectorAuthor = feedSelector.getSelector();
        if (!feedSelector.getValidator().equals("")) {
          expressionAuthor = feedSelector.getValidator();
        }
      }

      if ("commentsText".equals(feedSelector.getType())) {
        selectorText = feedSelector.getSelector();
        if (!feedSelector.getValidator().equals("")) {
          expressionText = feedSelector.getValidator();
        }
      }
      if ("commentsDate".equals(feedSelector.getType())) {
        selectorDate = feedSelector.getSelector();
        if (!feedSelector.getValidator().equals("")) {
          expressionDate = feedSelector.getValidator();
        }
      }

      if ("comments".equals(feedSelector.getType())) {
        elmts = doc.select(feedSelector.getSelector());
        commenters = true;
      }

      if (feedSelector.getFormat() != null && !feedSelector.getFormat().equals("")) {
        patternDate = feedSelector.getFormat();
      }
    }

    newEntryComments = new PostType();
    newEntryCommentsSolr = new Post();

    if (commenters && elmts.size() > 0) {
      Integer cantCommnents = 0;

      for (Element comment : elmts) {

        Logger.getLogger(this.getClass().getName())
            .log(Level.INFO, "Comentario: {0}", comment.html());

        Boolean validComment = false;

        if (selectorAuthor != null) {
          for (Element element : comment.select(selectorAuthor)) {
            author = element.text();
            if (expressionAuthor == null) {
              break;
            } else {
              if (author != null && !"".equals(author)) {
                patron = Pattern.compile(expressionAuthor);
                matcher = patron.matcher(author);
                if (matcher.find()) {
                  author = author.replaceAll(matcher.group(1), "");
                  break;
                }
              }
            }
          }
        }

        Logger.getLogger(this.getClass().getName())
            .log(Level.INFO, "SelectorText; {0}", selectorText);
        if (selectorText != null) {
          for (Element element : comment.select(selectorText)) {
            if (expressionText != null) {
              String html = element.html();
              Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Text HTML: {0}", html);
              if (html != null && !"".equals(html)) {
                patron = Pattern.compile("^[^<].*");
                matcher = patron.matcher(html);
                if (matcher.find()) {
                  text = element.text();
                  Logger.getLogger(this.getClass().getName())
                      .log(Level.INFO, "Matchea, Text; {0}", text);
                  validComment = true;
                  break;
                } else {
                  validComment = false;
                }
              } else {
                validComment = false;
              }
            } else {
              text = element.text();
              validComment = true;
              break;
            }
          }
        }

        if (selectorDate != null) {
          Logger.getLogger(this.getClass().getName())
              .log(Level.INFO, "Selector Fecha: {0}", selectorDate);
          for (Element element : comment.select(selectorDate)) {
            timestamp = element.text();
            if (expressionDate != null && timestamp != null && !"".equals(timestamp)) {
              Logger.getLogger(this.getClass().getName())
                  .log(
                      Level.INFO,
                      "Validador Fecha: {0}, Texto Fecha: {1}",
                      new Object[] {expressionDate, timestamp});
              patron = Pattern.compile(expressionDate);
              matcher = patron.matcher(timestamp);
              if (matcher.find()) {
                timestamp = timestamp.replaceAll(matcher.group(1), "");
                break;
              }
            } else {
              break;
            }
          }
        }

        if (validComment && comments) {
          Logger.getLogger(this.getClass().getName()).log(Level.INFO, "El comentario es válido");
          cantCommnents++;
          if (!json) {
            newEntryComments.setId(idPost + author + timestamp);
            newEntryComments.setSourcePost(idPost);
            newEntryComments.setDocType("comment");
            newEntryComments.setFromUser(new User(id, author, null, null, null));
            newEntryComments.setText(text);
            newEntryComments.setCreated(timestamp);
            newEntryComments.setSource(source);
            newsList.add(newEntryComments);
          } else {
            newEntryCommentsSolr.setId(idPost + author + timestamp);
            newEntryCommentsSolr.setSourcePost(idPost);
            newEntryCommentsSolr.setDocType("comment");
            newEntryCommentsSolr.setFromUser(new User(id, author, null, null, null));
            newEntryCommentsSolr.setText(text);
            newEntryCommentsSolr.setSource(source);
            if (patternDate != null)
              newEntryCommentsSolr.setCreated(getDate(timestamp, patternDate));
            newsListSolr.add(newEntryCommentsSolr);
          }
        }
        // comments.add(new Comment(id , new User (id,author,null,null,null) , text , new
        // Date(timestamp)));
      }
      list.add(new ActionType("comments", cantCommnents));
    }

    if (list.isEmpty()) {
      return new ArrayList();
    } else {
      return new ArrayList(list);
    }
  }
コード例 #2
0
  public List<LinkType> getLinks(FeedSelectors feedSelectors, Document doc, String host)
      throws FileNotFoundException, IOException, BadLocationException {

    Boolean isAvatar = false;

    List<LinkType> list = new ArrayList<LinkType>();

    // FileInputStream datos= new FileInputStream (ConDatos);
    /** ********** */
    if (feedSelectors == null
        || feedSelectors.getSelectors() == null
        || feedSelectors.getSelectors().isEmpty()) {
      feedSelectors = dao.retrieve("default");
    }
    if (feedSelectors == null
        || feedSelectors.getSelectors() == null
        || feedSelectors.getSelectors().isEmpty()) {
      return null;
    }
    Elements elmts;

    for (FeedSelector feedSelector : feedSelectors.getSelectors()) {
      if ("picture".equals(feedSelector.getType())) {
        elmts = doc.select(feedSelector.getSelector());
        for (Element elmt : elmts) {
          String link = addHost(elmt.attr("src"), host);
          Logger.getLogger(this.getClass().getName()).log(Level.INFO, "PICTURE URL: {0}", link);
          if (checkImgSize(link)) {
            Logger.getLogger(this.getClass().getName())
                .log(Level.INFO, "SIZE VALIDO, AGREGO PICTURE");
            list.add(new LinkType("picture", link));
          } else {
            Logger.getLogger(this.getClass().getName())
                .log(Level.INFO, "SIZE NO VALIDO: {0}", link);
          }
        }
      } else if ("link".equals(feedSelector.getType())) {
        elmts = doc.select(feedSelector.getSelector());
        for (Element elmt : elmts) {
          String link = addHost(elmt.attr("href"), host);
          list.add(new LinkType("link", link));
        }
      } else if ("avatar".equals(feedSelector.getType())) {
        elmts = doc.select(feedSelector.getSelector());
        for (Element elmt : elmts) {
          String link = addHost(elmt.attr("src"), host);
          list.add(new LinkType("avatar", link));
          isAvatar = true;
        }
      }
    }

    Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Antes logo");
    if (!isAvatar && feedSelectors.getUrlLogo() != null) {
      Logger.getLogger(this.getClass().getName())
          .log(Level.INFO, "Obtengo logo por defecto: {0}", feedSelectors.getUrlLogo());
      list.add(new LinkType("avatar", feedSelectors.getUrlLogo()));
    }

    if (list.isEmpty()) {
      return null;
    } else {
      return list;
    }
  }