Beispiel #1
0
  String extractContentFromDocument(
      LesothoCourtDocument scd, String pdfFile, int startpage, int endpage) {
    Document document = null;
    File input = new File(pdfFile);
    String textContent = null;
    String extractedCaseId = "";
    ArrayList<String> participants = new ArrayList<String>();
    String extractedDecisionDate = "";
    String participantsString = "";

    if (pdfFile == null) {
      System.out.println("File name is not valid");
    } else {
      try {
        document = Jsoup.parse(input, "UTF-8");
        String textToBeExtracted = document.select("h1").text();
        String[] extractedElements = textToBeExtracted.toUpperCase().split("V");
        if (extractedElements != null) {
          participants.add(extractedElements[0]);

          String[] extractedEle = null;
          extractedEle = (extractedElements[extractedElements.length - 1]).split("\\(");
          participants.add(extractedEle[0]);

          for (String s : participants) {
            participantsString += s + "\t";
          }

          System.out.println(participantsString);
          extractedCaseId = extractedEle[1];
          extractedCaseId = extractedCaseId.replaceAll("\\)", "");
          textContent = document.body().toString();
          String temp = "Judgment Date:";
          /**
           * int startIndex = textContent.indexOf(temp)+temp.length(); String subContent =
           * textContent.substring(startIndex); int endIndex = subContent.indexOf("\n"); *
           */
          Element decDate = document.select("span.date-display-single").first();
          extractedDecisionDate = decDate.text();

          System.out.println("Decision Date: " + extractedDecisionDate);
          System.out.println("Case Id: " + extractedCaseId);
        }
        scd.setCaseId(extractedCaseId);
        scd.setDecisionDate(extractedDecisionDate);
        scd.setHeardDate(null);
        scd.setParticipantsName(participantsString);

      } catch (Exception e) {
        System.out.println("Error in parsing html : " + e.getMessage());
      }
    }
    return textContent;
  }
Beispiel #2
0
  CourtDocument processCaseDetails(String caseFile, String sourceFileName) {
    int startPage = 1;
    int endPage = 1;
    LesothoCourtDocument scd =
        new LesothoCourtDocument(
            this.getCountryName(),
            this.getCourtName(),
            this.getProcessedUserName(),
            sourceFileName);
    String htmlFileContent = extractContentFromDocument(scd, caseFile, startPage, endPage);
    int filelength = caseFile.length();
    this.setFileLength(filelength);

    List<String> pageContentList = new ArrayList<String>();
    pageContentList.add(htmlFileContent);

    scd.extractCitations(this.getCourtName(), pageContentList);
    return scd;
  }