private void analyzeWordFile(File fileName) throws IOException { XWPFDocument doc = new XWPFDocument(new FileInputStream(fileName)); XWPFWordExtractor extractor = new XWPFWordExtractor(doc); String[] docLines = extractor.getText().split("\n"); for (int i = 0; i < docLines.length; i++) { checkForKeywords(fileName.getName(), docLines[i], i); } }
public void ReadAll(String path, String filename) { try { FileInputStream fis = new FileInputStream(path + filename + ".doc"); XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis)); XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc); System.out.println(extractor.getText()); } catch (Exception ex) { ex.printStackTrace(); } }
private String sampleDocx(String resourceName) { String content = null; try { InputStream input = new BOMInputStream(TestUtil.class.getResourceAsStream(resourceName)); XWPFDocument document = new XWPFDocument(OPCPackage.open(input)); @SuppressWarnings("resource") XWPFWordExtractor wordExtractor = new XWPFWordExtractor(document); content = wordExtractor.getText(); } catch (Exception exep) { exep.printStackTrace(); } return content; }
public ArrayList<String> getText(File file) { String text = ""; try { XWPFDocument docx = new XWPFDocument(new FileInputStream(file)); XWPFWordExtractor we = new XWPFWordExtractor(docx); text = we.getText(); we.close(); } catch (IOException e) { e.printStackTrace(); } return new ArrayList<String>(Arrays.asList(text.split("\n"))); }
/** * save the converted text (without any processing) to the given file. * * @param filename * @return */ public void DOCX2Text(String filename) { try { File output = new File(filename); // The text file where you are going to store the extracted data XWPFWordExtractor ex = new XWPFWordExtractor(dx); String fileData = ex.getText(); wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output))); wr.write(fileData); wr.close(); } catch (Exception e) { e.printStackTrace(); } }
/** * Extracts text from a Word document and stores it in the document. * * @param inputStream An input stream pointing to the Word document to be read. * @throws IOException */ private static char[] loadMSWordDocx(InputStream inputStream) throws IOException { XWPFDocument docx = new XWPFDocument(inputStream); XWPFWordExtractor extractor = new XWPFWordExtractor(docx); return extractor.getText().toCharArray(); }