/** * Constructor sets the input and output file and convert the pdf, docx and doc files to text . * * @param infile,outfile * @return */ public TextConvertor(String infile, String outfile) { try { File input = new File(infile); // The file from where you would like to extract FileInputStream fis = new FileInputStream(input.getAbsolutePath()); int x = fis.read(); int y = fis.read(); fis = new FileInputStream(input.getAbsolutePath()); if (x == 37 && y == 80) { filetype = "pdf"; pd = PDDocument.load(input); PDF2Text(outfile); } else if (x == 80 && y == 75) { filetype = "docx"; dx = new XWPFDocument(fis); DOCX2Text(outfile); } else if (x == 208 && y == 207) { filetype = "doc"; dc = new HWPFDocument(fis); DOC2Text(outfile); } } catch (Exception e) { e.printStackTrace(); } }
public static String[] getFiles(String dirname, FilenameFilter filter) { File dir = new File(dirname); String[] files = dir.list(filter); return files; }