/** * Parses through an html daybreak file and extracts any found daybreak fragments * * @param daybreakFile * @return */ public static List<DaybreakFragment> extractDaybreakFragments(DaybreakFile daybreakFile) throws IOException { Assert.isTrue( daybreakFile.getDaybreakDocType() != DaybreakDocType.PAGE_FILE, "This parser is not for page files"); List<DaybreakFragment> daybreakFragments = new ArrayList<>(); String fullText = FileUtils.readFileToString(daybreakFile.getFile(), "UTF-8").replaceAll("\\r?\\n", " "); Matcher rowMatcher = rowPattern.matcher(fullText); rowMatcher.find(); // Throw the first two rows away rowMatcher.find(); // They are just headers for the table while (rowMatcher.find()) { // Each row contains 1 bill String text = stripParts .matcher(rowMatcher.group(1)) // Match all non <br> and </td> tags .replaceAll("") // Remove them .replace("</td>", "\n") // convert </td> and <br> to newlines .replace("<br>", "\n") .replace("�", " ") // Replace all instances of � with space ; // Here we are going through each line and trimming excess whitespace String[] lines = text.split("\\n"); String fragmentPrintNo = null; StringBuilder fragmentText = new StringBuilder(); fragmentText.ensureCapacity(text.length()); for (int i = 0; i < lines.length; i++) { if (i == 0) { // The first line should be the bill print number fragmentPrintNo = lines[i].trim(); } fragmentText.append(lines[i].trim()); fragmentText.append('\n'); } // TODO: it is assumed that the daybreak only contains bills from the current session year // todo: perhaps there is another way of getting the session year? BillId fragmentBillId = new BillId(fragmentPrintNo, SessionYear.of(daybreakFile.getReportDate().getYear())); daybreakFragments.add( new DaybreakFragment(fragmentBillId, daybreakFile, fragmentText.toString())); } return daybreakFragments; }
/** * Constructs an incomplete member based on a limited amount of information * * @param lbdcShortName String - The short name of the member as represented in the source data. * @param sessionYear SessionYear - The session year in which this member was active. * @param chamber Chamber * @throws ParseError if the given shortname cannot be parsed * @return Member */ public static Member newMakeshiftMember( String lbdcShortName, SessionYear sessionYear, Chamber chamber) throws ParseError { if (lbdcShortName == null) { throw new ParseError("Attempted to create makeshift member, but lbdcShortName was null!"); } // Assembly members are not already uppercase lbdcShortName = lbdcShortName.toUpperCase().trim(); Member member = new Member(); member.setLbdcShortName(lbdcShortName); member.setSessionYear(sessionYear); member.setChamber(chamber); member.setIncumbent(sessionYear.equals(SessionYear.current())); Matcher shortNameMatcher = shortNamePattern.matcher(lbdcShortName); if (shortNameMatcher.matches()) { member.setLastName(shortNameMatcher.group(1)); if (shortNameMatcher.groupCount() == 3) { member.setFirstName(shortNameMatcher.group(3)); member.setFullName( (member.getFirstName() != null ? member.getFirstName() + " " : "") + member.getLastName()); } else { member.setFullName(member.getLastName()); } } else { throw new ParseError( "Can not create makeshift member: LBDC shortname '" + lbdcShortName + "' does not match specification"); } return member; }
@Test public void testOrdinalMapTests() throws Exception { List<Member> members1 = Lists.newArrayList(); List<Member> members2 = Lists.newArrayList(); members1.add(memberService.getMemberByShortName("BALL", SessionYear.of(2013), Chamber.SENATE)); members1.add( memberService.getMemberByShortName("SAVINO", SessionYear.of(2013), Chamber.SENATE)); members1.add( memberService.getMemberByShortName("MARTINS", SessionYear.of(2013), Chamber.SENATE)); Map<Member, Integer> map1 = Maps.newHashMap(); for (int i = 0; i < members1.size(); i++) { map1.put(members1.get(i), i); } members2.add(memberService.getMemberByShortName("BALL", SessionYear.of(2013), Chamber.SENATE)); members2.add( memberService.getMemberByShortName("MARTINS", SessionYear.of(2013), Chamber.SENATE)); members2.add( memberService.getMemberByShortName("ZELDIN", SessionYear.of(2013), Chamber.SENATE)); Map<Member, Integer> map2 = Maps.newHashMap(); for (int i = 0; i < members2.size(); i++) { map2.put(members2.get(i), i); } MapDifference<Member, Integer> diff = Maps.difference(map1, map2); logger.info("{}", diff); // Map<Integer, String> map1 = // SqlBaseDao.getOridinalMapFromList(Lists.newArrayList("moose", "cow", "sheep"), // 1); // Map<Integer, String> map2 = // SqlBaseDao.getOridinalMapFromList(Lists.newArrayList("loser", "moose", "cow", // "sheep"), 1); // MapDifference<Integer, String> mapDiff = Maps.difference(map1, map2); // logger.info("{}", mapDiff.entriesOnlyOnRight()); }
FilterBuilder getSessionFilter(SessionYear sessionYear) { return FilterBuilders.termFilter("sessionYear", sessionYear.getYear()); }