@Test public void testGetDataNodes() { Document doc = Jsoup.parse("<script>One Two</script> <style>Three Four</style> <p>Fix Six</p>"); Element script = doc.select("script").first(); Element style = doc.select("style").first(); Element p = doc.select("p").first(); List<DataNode> scriptData = script.dataNodes(); assertEquals(1, scriptData.size()); assertEquals("One Two", scriptData.get(0).getWholeData()); List<DataNode> styleData = style.dataNodes(); assertEquals(1, styleData.size()); assertEquals("Three Four", styleData.get(0).getWholeData()); List<DataNode> pData = p.dataNodes(); assertEquals(0, pData.size()); }
public Set<MatchParserObject> parse(WhoScoredMatchParserObject matchParserObject) { Elements scriptElements = getDocument().getElementsByTag("script"); Pattern matchIdPattern = Pattern.compile( ".*var liveMatchUpdater = .*parameters: \\{.*id: (\\d*).*\\}.*", Pattern.DOTALL); Pattern scriptPattern = Pattern.compile( "(.*)var initialMatchDataForScrappers = \\[\\[(.*), \\[(.*)", Pattern.DOTALL); Pattern fixturePattern = Pattern.compile( "\\[(\\d*),(\\d*),'(.*?)','(.*?)','(.*?)','.*?',\\d*,'(.*?)',(.*)\\]", Pattern.DOTALL); Pattern matchEventsPattern = Pattern.compile("\\[\\[(.*?)\\]\\]", Pattern.DOTALL); Pattern goalPattern = Pattern.compile( ".*\\['(.*?)',('(.*?)')?,'(goal|owngoal|penalty-goal)','.*?',('OG'|'Pen.')?,(\\d*),(\\d*),(\\d*)\\].*", Pattern.DOTALL); Pattern cardPattern = Pattern.compile( ".*\\['(.*?)',,'(yellow|secondyellow|red)',,,(\\d*),(\\d*),(\\d*)\\].*", Pattern.DOTALL); Pattern substitutionPattern = Pattern.compile(".*\\['(.*?)','(.*?)','subst',,,(\\d*),(\\d*),(\\d*)\\].*", Pattern.DOTALL); for (Element scriptElement : scriptElements) { for (DataNode node : scriptElement.dataNodes()) { Matcher matchIdMatcher = matchIdPattern.matcher(node.toString()); if (matchIdMatcher.matches()) { int whoScoredId = Integer.parseInt(matchIdMatcher.group(1)); if (matchParserObject.getWhoScoredId() == 0) { matchParserObject.setWhoScoredId(whoScoredId); } else if (matchParserObject.getWhoScoredId() != whoScoredId) { logger.error( "Provided WhoScoredMatchParserObject had whoScoredId={} but parsed document had whoScoredId={}", matchParserObject.getWhoScoredId(), whoScoredId); } } Matcher scriptMatcher = scriptPattern.matcher(node.toString()); if (scriptMatcher.matches()) { String[] scriptVariables = scriptMatcher.group(2).split("\n, "); for (String scriptVariable : scriptVariables) { scriptVariable = scriptVariable.trim(); Matcher fixtureMatcher = fixturePattern.matcher(scriptVariable); Matcher matchEventsMatcher = matchEventsPattern.matcher(scriptVariable); if (fixtureMatcher.matches()) { if (matchParserObject.getHomeTeam() == null) { matchParserObject.setHomeTeam( new WhoScoredTeamParserObject( fixtureMatcher.group(3), Integer.parseInt(fixtureMatcher.group(1)))); } if (matchParserObject.getAwayTeam() == null) { matchParserObject.setAwayTeam( new WhoScoredTeamParserObject( fixtureMatcher.group(4), Integer.parseInt(fixtureMatcher.group(2)))); } if (matchParserObject.getDateTime() == null) { matchParserObject.setDateTime(fixtureMatcher.group(5)); } if (matchParserObject.getTimeElapsed() == null) { matchParserObject.setTimeElapsed(fixtureMatcher.group(6)); } } else if (matchEventsMatcher.matches()) { for (String eventVariable : matchEventsMatcher.group(1).split("\n")) { Matcher goalMatcher = goalPattern.matcher(eventVariable); Matcher cardMatcher = cardPattern.matcher(eventVariable); Matcher substitutionMatcher = substitutionPattern.matcher(eventVariable); if (goalMatcher.matches()) { WhoScoredGoalParserObject goalParserObject = new WhoScoredGoalParserObject( goalMatcher.group(1), Integer.parseInt(goalMatcher.group(7)), goalMatcher.group(3), Integer.parseInt(goalMatcher.group(8)), Integer.parseInt(goalMatcher.group(6)), goalMatcher.group(4).equalsIgnoreCase("penalty-goal"), goalMatcher.group(4).equalsIgnoreCase("owngoal")); WhoScoredTeamParserObject team = matchParserObject.getTeamForGoal(goalParserObject); if (team == null) { logger.error("Could not find team for goal {}.", goalParserObject); } else { team.getGoals().add(goalParserObject); } } else if (cardMatcher.matches()) { CardType cardType = (cardMatcher.group(2).equalsIgnoreCase("yellow") ? CardType.YELLOW : CardType.RED); WhoScoredCardParserObject cardParserObject = new WhoScoredCardParserObject( cardMatcher.group(1), Integer.parseInt(cardMatcher.group(4)), Integer.parseInt(cardMatcher.group(3)), cardType); matchParserObject .getTeamForPlayer(cardParserObject.getPlayerWhoScoredId()) .getCards() .add(cardParserObject); } else if (substitutionMatcher.matches()) { WhoScoredSubstitutionParserObject substitutionParserObject = new WhoScoredSubstitutionParserObject( substitutionMatcher.group(1), Integer.parseInt(substitutionMatcher.group(4)), substitutionMatcher.group(2), Integer.parseInt(substitutionMatcher.group(5)), Integer.parseInt(substitutionMatcher.group(3))); matchParserObject .getTeamForPlayer(substitutionParserObject.getPlayerOutWhoScoredId()) .getSubstitutions() .add(substitutionParserObject); } } } } } } } getParserProperties().map(matchParserObject); Set<MatchParserObject> matchParserObjects = new HashSet<MatchParserObject>(); matchParserObjects.add(matchParserObject); return matchParserObjects; }