Example #1
0
    public void head(Node source, int depth) {
      if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
          ElementMeta meta = createSafeElement(sourceEl);
          Element destChild = meta.el;
          destination.appendChild(destChild);

          numDiscarded += meta.numAttribsDiscarded;
          destination = destChild;
        } else if (source
            != root) { // not a safe tag, so don't add. don't count root against discarded.
          numDiscarded++;
        }
      } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText(), source.baseUri());
        destination.appendChild(destText);
      } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
        DataNode sourceData = (DataNode) source;
        DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri());
        destination.appendChild(destData);
      } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
      }
    }
Example #2
0
  /**
   * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
   *
   * @return the data, or empty string if none
   * @see #dataNodes()
   */
  public String data() {
    StringBuilder sb = new StringBuilder();

    for (Node childNode : childNodes) {
      if (childNode instanceof DataNode) {
        DataNode data = (DataNode) childNode;
        sb.append(data.getWholeData());
      } else if (childNode instanceof Element) {
        Element element = (Element) childNode;
        String elementData = element.data();
        sb.append(elementData);
      }
    }
    return sb.toString();
  }
  public Set<MatchParserObject> parse(WhoScoredMatchParserObject matchParserObject) {
    Elements scriptElements = getDocument().getElementsByTag("script");
    Pattern matchIdPattern =
        Pattern.compile(
            ".*var liveMatchUpdater = .*parameters: \\{.*id: (\\d*).*\\}.*", Pattern.DOTALL);
    Pattern scriptPattern =
        Pattern.compile(
            "(.*)var initialMatchDataForScrappers = \\[\\[(.*), \\[(.*)", Pattern.DOTALL);
    Pattern fixturePattern =
        Pattern.compile(
            "\\[(\\d*),(\\d*),'(.*?)','(.*?)','(.*?)','.*?',\\d*,'(.*?)',(.*)\\]", Pattern.DOTALL);
    Pattern matchEventsPattern = Pattern.compile("\\[\\[(.*?)\\]\\]", Pattern.DOTALL);
    Pattern goalPattern =
        Pattern.compile(
            ".*\\['(.*?)',('(.*?)')?,'(goal|owngoal|penalty-goal)','.*?',('OG'|'Pen.')?,(\\d*),(\\d*),(\\d*)\\].*",
            Pattern.DOTALL);
    Pattern cardPattern =
        Pattern.compile(
            ".*\\['(.*?)',,'(yellow|secondyellow|red)',,,(\\d*),(\\d*),(\\d*)\\].*",
            Pattern.DOTALL);
    Pattern substitutionPattern =
        Pattern.compile(".*\\['(.*?)','(.*?)','subst',,,(\\d*),(\\d*),(\\d*)\\].*", Pattern.DOTALL);

    for (Element scriptElement : scriptElements) {
      for (DataNode node : scriptElement.dataNodes()) {
        Matcher matchIdMatcher = matchIdPattern.matcher(node.toString());
        if (matchIdMatcher.matches()) {
          int whoScoredId = Integer.parseInt(matchIdMatcher.group(1));
          if (matchParserObject.getWhoScoredId() == 0) {
            matchParserObject.setWhoScoredId(whoScoredId);
          } else if (matchParserObject.getWhoScoredId() != whoScoredId) {
            logger.error(
                "Provided WhoScoredMatchParserObject had whoScoredId={} but parsed document had whoScoredId={}",
                matchParserObject.getWhoScoredId(),
                whoScoredId);
          }
        }

        Matcher scriptMatcher = scriptPattern.matcher(node.toString());
        if (scriptMatcher.matches()) {
          String[] scriptVariables = scriptMatcher.group(2).split("\n, ");
          for (String scriptVariable : scriptVariables) {
            scriptVariable = scriptVariable.trim();
            Matcher fixtureMatcher = fixturePattern.matcher(scriptVariable);
            Matcher matchEventsMatcher = matchEventsPattern.matcher(scriptVariable);
            if (fixtureMatcher.matches()) {
              if (matchParserObject.getHomeTeam() == null) {
                matchParserObject.setHomeTeam(
                    new WhoScoredTeamParserObject(
                        fixtureMatcher.group(3), Integer.parseInt(fixtureMatcher.group(1))));
              }
              if (matchParserObject.getAwayTeam() == null) {
                matchParserObject.setAwayTeam(
                    new WhoScoredTeamParserObject(
                        fixtureMatcher.group(4), Integer.parseInt(fixtureMatcher.group(2))));
              }
              if (matchParserObject.getDateTime() == null) {
                matchParserObject.setDateTime(fixtureMatcher.group(5));
              }
              if (matchParserObject.getTimeElapsed() == null) {
                matchParserObject.setTimeElapsed(fixtureMatcher.group(6));
              }
            } else if (matchEventsMatcher.matches()) {
              for (String eventVariable : matchEventsMatcher.group(1).split("\n")) {
                Matcher goalMatcher = goalPattern.matcher(eventVariable);
                Matcher cardMatcher = cardPattern.matcher(eventVariable);
                Matcher substitutionMatcher = substitutionPattern.matcher(eventVariable);
                if (goalMatcher.matches()) {
                  WhoScoredGoalParserObject goalParserObject =
                      new WhoScoredGoalParserObject(
                          goalMatcher.group(1),
                          Integer.parseInt(goalMatcher.group(7)),
                          goalMatcher.group(3),
                          Integer.parseInt(goalMatcher.group(8)),
                          Integer.parseInt(goalMatcher.group(6)),
                          goalMatcher.group(4).equalsIgnoreCase("penalty-goal"),
                          goalMatcher.group(4).equalsIgnoreCase("owngoal"));

                  WhoScoredTeamParserObject team =
                      matchParserObject.getTeamForGoal(goalParserObject);
                  if (team == null) {
                    logger.error("Could not find team for goal {}.", goalParserObject);
                  } else {
                    team.getGoals().add(goalParserObject);
                  }
                } else if (cardMatcher.matches()) {
                  CardType cardType =
                      (cardMatcher.group(2).equalsIgnoreCase("yellow")
                          ? CardType.YELLOW
                          : CardType.RED);
                  WhoScoredCardParserObject cardParserObject =
                      new WhoScoredCardParserObject(
                          cardMatcher.group(1),
                          Integer.parseInt(cardMatcher.group(4)),
                          Integer.parseInt(cardMatcher.group(3)),
                          cardType);
                  matchParserObject
                      .getTeamForPlayer(cardParserObject.getPlayerWhoScoredId())
                      .getCards()
                      .add(cardParserObject);
                } else if (substitutionMatcher.matches()) {
                  WhoScoredSubstitutionParserObject substitutionParserObject =
                      new WhoScoredSubstitutionParserObject(
                          substitutionMatcher.group(1),
                          Integer.parseInt(substitutionMatcher.group(4)),
                          substitutionMatcher.group(2),
                          Integer.parseInt(substitutionMatcher.group(5)),
                          Integer.parseInt(substitutionMatcher.group(3)));
                  matchParserObject
                      .getTeamForPlayer(substitutionParserObject.getPlayerOutWhoScoredId())
                      .getSubstitutions()
                      .add(substitutionParserObject);
                }
              }
            }
          }
        }
      }
    }

    getParserProperties().map(matchParserObject);

    Set<MatchParserObject> matchParserObjects = new HashSet<MatchParserObject>();
    matchParserObjects.add(matchParserObject);
    return matchParserObjects;
  }