예제 #1
0
  @Test
  public void testGetDataNodes() {
    Document doc = Jsoup.parse("<script>One Two</script> <style>Three Four</style> <p>Fix Six</p>");
    Element script = doc.select("script").first();
    Element style = doc.select("style").first();
    Element p = doc.select("p").first();

    List<DataNode> scriptData = script.dataNodes();
    assertEquals(1, scriptData.size());
    assertEquals("One Two", scriptData.get(0).getWholeData());

    List<DataNode> styleData = style.dataNodes();
    assertEquals(1, styleData.size());
    assertEquals("Three Four", styleData.get(0).getWholeData());

    List<DataNode> pData = p.dataNodes();
    assertEquals(0, pData.size());
  }
  public Set<MatchParserObject> parse(WhoScoredMatchParserObject matchParserObject) {
    Elements scriptElements = getDocument().getElementsByTag("script");
    Pattern matchIdPattern =
        Pattern.compile(
            ".*var liveMatchUpdater = .*parameters: \\{.*id: (\\d*).*\\}.*", Pattern.DOTALL);
    Pattern scriptPattern =
        Pattern.compile(
            "(.*)var initialMatchDataForScrappers = \\[\\[(.*), \\[(.*)", Pattern.DOTALL);
    Pattern fixturePattern =
        Pattern.compile(
            "\\[(\\d*),(\\d*),'(.*?)','(.*?)','(.*?)','.*?',\\d*,'(.*?)',(.*)\\]", Pattern.DOTALL);
    Pattern matchEventsPattern = Pattern.compile("\\[\\[(.*?)\\]\\]", Pattern.DOTALL);
    Pattern goalPattern =
        Pattern.compile(
            ".*\\['(.*?)',('(.*?)')?,'(goal|owngoal|penalty-goal)','.*?',('OG'|'Pen.')?,(\\d*),(\\d*),(\\d*)\\].*",
            Pattern.DOTALL);
    Pattern cardPattern =
        Pattern.compile(
            ".*\\['(.*?)',,'(yellow|secondyellow|red)',,,(\\d*),(\\d*),(\\d*)\\].*",
            Pattern.DOTALL);
    Pattern substitutionPattern =
        Pattern.compile(".*\\['(.*?)','(.*?)','subst',,,(\\d*),(\\d*),(\\d*)\\].*", Pattern.DOTALL);

    for (Element scriptElement : scriptElements) {
      for (DataNode node : scriptElement.dataNodes()) {
        Matcher matchIdMatcher = matchIdPattern.matcher(node.toString());
        if (matchIdMatcher.matches()) {
          int whoScoredId = Integer.parseInt(matchIdMatcher.group(1));
          if (matchParserObject.getWhoScoredId() == 0) {
            matchParserObject.setWhoScoredId(whoScoredId);
          } else if (matchParserObject.getWhoScoredId() != whoScoredId) {
            logger.error(
                "Provided WhoScoredMatchParserObject had whoScoredId={} but parsed document had whoScoredId={}",
                matchParserObject.getWhoScoredId(),
                whoScoredId);
          }
        }

        Matcher scriptMatcher = scriptPattern.matcher(node.toString());
        if (scriptMatcher.matches()) {
          String[] scriptVariables = scriptMatcher.group(2).split("\n, ");
          for (String scriptVariable : scriptVariables) {
            scriptVariable = scriptVariable.trim();
            Matcher fixtureMatcher = fixturePattern.matcher(scriptVariable);
            Matcher matchEventsMatcher = matchEventsPattern.matcher(scriptVariable);
            if (fixtureMatcher.matches()) {
              if (matchParserObject.getHomeTeam() == null) {
                matchParserObject.setHomeTeam(
                    new WhoScoredTeamParserObject(
                        fixtureMatcher.group(3), Integer.parseInt(fixtureMatcher.group(1))));
              }
              if (matchParserObject.getAwayTeam() == null) {
                matchParserObject.setAwayTeam(
                    new WhoScoredTeamParserObject(
                        fixtureMatcher.group(4), Integer.parseInt(fixtureMatcher.group(2))));
              }
              if (matchParserObject.getDateTime() == null) {
                matchParserObject.setDateTime(fixtureMatcher.group(5));
              }
              if (matchParserObject.getTimeElapsed() == null) {
                matchParserObject.setTimeElapsed(fixtureMatcher.group(6));
              }
            } else if (matchEventsMatcher.matches()) {
              for (String eventVariable : matchEventsMatcher.group(1).split("\n")) {
                Matcher goalMatcher = goalPattern.matcher(eventVariable);
                Matcher cardMatcher = cardPattern.matcher(eventVariable);
                Matcher substitutionMatcher = substitutionPattern.matcher(eventVariable);
                if (goalMatcher.matches()) {
                  WhoScoredGoalParserObject goalParserObject =
                      new WhoScoredGoalParserObject(
                          goalMatcher.group(1),
                          Integer.parseInt(goalMatcher.group(7)),
                          goalMatcher.group(3),
                          Integer.parseInt(goalMatcher.group(8)),
                          Integer.parseInt(goalMatcher.group(6)),
                          goalMatcher.group(4).equalsIgnoreCase("penalty-goal"),
                          goalMatcher.group(4).equalsIgnoreCase("owngoal"));

                  WhoScoredTeamParserObject team =
                      matchParserObject.getTeamForGoal(goalParserObject);
                  if (team == null) {
                    logger.error("Could not find team for goal {}.", goalParserObject);
                  } else {
                    team.getGoals().add(goalParserObject);
                  }
                } else if (cardMatcher.matches()) {
                  CardType cardType =
                      (cardMatcher.group(2).equalsIgnoreCase("yellow")
                          ? CardType.YELLOW
                          : CardType.RED);
                  WhoScoredCardParserObject cardParserObject =
                      new WhoScoredCardParserObject(
                          cardMatcher.group(1),
                          Integer.parseInt(cardMatcher.group(4)),
                          Integer.parseInt(cardMatcher.group(3)),
                          cardType);
                  matchParserObject
                      .getTeamForPlayer(cardParserObject.getPlayerWhoScoredId())
                      .getCards()
                      .add(cardParserObject);
                } else if (substitutionMatcher.matches()) {
                  WhoScoredSubstitutionParserObject substitutionParserObject =
                      new WhoScoredSubstitutionParserObject(
                          substitutionMatcher.group(1),
                          Integer.parseInt(substitutionMatcher.group(4)),
                          substitutionMatcher.group(2),
                          Integer.parseInt(substitutionMatcher.group(5)),
                          Integer.parseInt(substitutionMatcher.group(3)));
                  matchParserObject
                      .getTeamForPlayer(substitutionParserObject.getPlayerOutWhoScoredId())
                      .getSubstitutions()
                      .add(substitutionParserObject);
                }
              }
            }
          }
        }
      }
    }

    getParserProperties().map(matchParserObject);

    Set<MatchParserObject> matchParserObjects = new HashSet<MatchParserObject>();
    matchParserObjects.add(matchParserObject);
    return matchParserObjects;
  }