/**
   * Parses through an html daybreak file and extracts any found daybreak fragments
   *
   * @param daybreakFile
   * @return
   */
  public static List<DaybreakFragment> extractDaybreakFragments(DaybreakFile daybreakFile)
      throws IOException {
    Assert.isTrue(
        daybreakFile.getDaybreakDocType() != DaybreakDocType.PAGE_FILE,
        "This parser is not for page files");

    List<DaybreakFragment> daybreakFragments = new ArrayList<>();

    String fullText =
        FileUtils.readFileToString(daybreakFile.getFile(), "UTF-8").replaceAll("\\r?\\n", " ");

    Matcher rowMatcher = rowPattern.matcher(fullText);
    rowMatcher.find(); // Throw the first two rows away
    rowMatcher.find(); // They are just headers for the table
    while (rowMatcher.find()) { // Each row contains 1 bill
      String text =
          stripParts
              .matcher(rowMatcher.group(1)) // Match all non <br> and </td> tags
              .replaceAll("") // Remove them
              .replace("</td>", "\n") // convert </td> and <br> to newlines
              .replace("<br>", "\n")
              .replace("�", " ") // Replace all instances of � with space
          ;

      // Here we are going through each line and trimming excess whitespace
      String[] lines = text.split("\\n");
      String fragmentPrintNo = null;
      StringBuilder fragmentText = new StringBuilder();
      fragmentText.ensureCapacity(text.length());
      for (int i = 0; i < lines.length; i++) {
        if (i == 0) { // The first line should be the bill print number
          fragmentPrintNo = lines[i].trim();
        }
        fragmentText.append(lines[i].trim());
        fragmentText.append('\n');
      }

      // TODO: it is assumed that the daybreak only contains bills from the current session year
      // todo: perhaps there is another way of getting the session year?
      BillId fragmentBillId =
          new BillId(fragmentPrintNo, SessionYear.of(daybreakFile.getReportDate().getYear()));

      daybreakFragments.add(
          new DaybreakFragment(fragmentBillId, daybreakFile, fragmentText.toString()));
    }

    return daybreakFragments;
  }
Esempio n. 2
0
  /**
   * Constructs an incomplete member based on a limited amount of information
   *
   * @param lbdcShortName String - The short name of the member as represented in the source data.
   * @param sessionYear SessionYear - The session year in which this member was active.
   * @param chamber Chamber
   * @throws ParseError if the given shortname cannot be parsed
   * @return Member
   */
  public static Member newMakeshiftMember(
      String lbdcShortName, SessionYear sessionYear, Chamber chamber) throws ParseError {
    if (lbdcShortName == null) {
      throw new ParseError("Attempted to create makeshift member, but lbdcShortName was null!");
    }
    // Assembly members are not already uppercase
    lbdcShortName = lbdcShortName.toUpperCase().trim();
    Member member = new Member();
    member.setLbdcShortName(lbdcShortName);
    member.setSessionYear(sessionYear);
    member.setChamber(chamber);
    member.setIncumbent(sessionYear.equals(SessionYear.current()));

    Matcher shortNameMatcher = shortNamePattern.matcher(lbdcShortName);
    if (shortNameMatcher.matches()) {
      member.setLastName(shortNameMatcher.group(1));
      if (shortNameMatcher.groupCount() == 3) {
        member.setFirstName(shortNameMatcher.group(3));
        member.setFullName(
            (member.getFirstName() != null ? member.getFirstName() + " " : "")
                + member.getLastName());
      } else {
        member.setFullName(member.getLastName());
      }
    } else {
      throw new ParseError(
          "Can not create makeshift member: LBDC shortname '"
              + lbdcShortName
              + "' does not match specification");
    }
    return member;
  }
  @Test
  public void testOrdinalMapTests() throws Exception {
    List<Member> members1 = Lists.newArrayList();
    List<Member> members2 = Lists.newArrayList();

    members1.add(memberService.getMemberByShortName("BALL", SessionYear.of(2013), Chamber.SENATE));
    members1.add(
        memberService.getMemberByShortName("SAVINO", SessionYear.of(2013), Chamber.SENATE));
    members1.add(
        memberService.getMemberByShortName("MARTINS", SessionYear.of(2013), Chamber.SENATE));

    Map<Member, Integer> map1 = Maps.newHashMap();
    for (int i = 0; i < members1.size(); i++) {
      map1.put(members1.get(i), i);
    }

    members2.add(memberService.getMemberByShortName("BALL", SessionYear.of(2013), Chamber.SENATE));
    members2.add(
        memberService.getMemberByShortName("MARTINS", SessionYear.of(2013), Chamber.SENATE));
    members2.add(
        memberService.getMemberByShortName("ZELDIN", SessionYear.of(2013), Chamber.SENATE));

    Map<Member, Integer> map2 = Maps.newHashMap();
    for (int i = 0; i < members2.size(); i++) {
      map2.put(members2.get(i), i);
    }

    MapDifference<Member, Integer> diff = Maps.difference(map1, map2);
    logger.info("{}", diff);
    //        Map<Integer, String> map1 =
    //                SqlBaseDao.getOridinalMapFromList(Lists.newArrayList("moose", "cow", "sheep"),
    // 1);
    //        Map<Integer, String> map2 =
    //                SqlBaseDao.getOridinalMapFromList(Lists.newArrayList("loser", "moose", "cow",
    // "sheep"), 1);
    //        MapDifference<Integer, String> mapDiff = Maps.difference(map1, map2);
    //        logger.info("{}", mapDiff.entriesOnlyOnRight());

  }
 FilterBuilder getSessionFilter(SessionYear sessionYear) {
   return FilterBuilders.termFilter("sessionYear", sessionYear.getYear());
 }