Пример #1
0
  /**
   * Constructor sets the input and output file and convert the pdf, docx and doc files to text .
   *
   * @param infile,outfile
   * @return
   */
  public TextConvertor(String infile, String outfile) {
    try {
      File input = new File(infile); // The file from where you would like to extract
      FileInputStream fis = new FileInputStream(input.getAbsolutePath());
      int x = fis.read();
      int y = fis.read();
      fis = new FileInputStream(input.getAbsolutePath());
      if (x == 37 && y == 80) {
        filetype = "pdf";
        pd = PDDocument.load(input);
        PDF2Text(outfile);
      } else if (x == 80 && y == 75) {
        filetype = "docx";

        dx = new XWPFDocument(fis);
        DOCX2Text(outfile);
      } else if (x == 208 && y == 207) {
        filetype = "doc";
        dc = new HWPFDocument(fis);
        DOC2Text(outfile);
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #2
0
  public static String[] getFiles(String dirname, FilenameFilter filter) {

    File dir = new File(dirname);

    String[] files = dir.list(filter);
    return files;
  }