Example #1
0
  /**
   * save the converted text (without any processing) to the given file.
   *
   * @param filename
   * @return
   */
  public void PDF2Text(String filename) {
    try {
      File output =
          new File(filename); // The text file where you are going to store the extracted data

      stripper = new PDFTextStripper();

      wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));

      stripper.writeText(pd, wr);

      if (pd != null) {
        pd.close();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Example #2
0
  public void PDF2TextPreProssesd(String filename) {
    try {

      stripper = new PDFTextStripper();
      stripper.setParagraphStart("&*&");
      stripper.setLineSeparator("#%#");
      stripper.setPageSeparator("#%#");
      String fulltxt = stripper.getText(pd);
      String paras[] = fulltxt.split("&*&");

      File file = new File(filename);
      try {
        BufferedWriter out = new BufferedWriter(new FileWriter(file));

        int i = 0;
        while (i < paras.length) {
          if (paras[i].length() > 200) {
            String para = paras[i].replace("#%#", " ");

            out.write(para + "\r\n");
          }
          i++;
        }
        out.close();

      } catch (IOException ex) {
        ex.printStackTrace();
      }

      if (pd != null) {
        pd.close();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }