private void analyzeWordFile(File fileName) throws IOException {
   XWPFDocument doc = new XWPFDocument(new FileInputStream(fileName));
   XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
   String[] docLines = extractor.getText().split("\n");
   for (int i = 0; i < docLines.length; i++) {
     checkForKeywords(fileName.getName(), docLines[i], i);
   }
 }
 public void ReadAll(String path, String filename) {
   try {
     FileInputStream fis = new FileInputStream(path + filename + ".doc");
     XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
     XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
     System.out.println(extractor.getText());
   } catch (Exception ex) {
     ex.printStackTrace();
   }
 }
 private String sampleDocx(String resourceName) {
   String content = null;
   try {
     InputStream input = new BOMInputStream(TestUtil.class.getResourceAsStream(resourceName));
     XWPFDocument document = new XWPFDocument(OPCPackage.open(input));
     @SuppressWarnings("resource")
     XWPFWordExtractor wordExtractor = new XWPFWordExtractor(document);
     content = wordExtractor.getText();
   } catch (Exception exep) {
     exep.printStackTrace();
   }
   return content;
 }
  public ArrayList<String> getText(File file) {

    String text = "";

    try {

      XWPFDocument docx = new XWPFDocument(new FileInputStream(file));
      XWPFWordExtractor we = new XWPFWordExtractor(docx);
      text = we.getText();
      we.close();

    } catch (IOException e) {
      e.printStackTrace();
    }

    return new ArrayList<String>(Arrays.asList(text.split("\n")));
  }
  /**
   * save the converted text (without any processing) to the given file.
   *
   * @param filename
   * @return
   */
  public void DOCX2Text(String filename) {
    try {
      File output =
          new File(filename); // The text file where you are going to store the extracted data

      XWPFWordExtractor ex = new XWPFWordExtractor(dx);

      String fileData = ex.getText();

      wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));
      wr.write(fileData);

      wr.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Beispiel #6
0
 /**
  * Extracts text from a Word document and stores it in the document.
  *
  * @param inputStream An input stream pointing to the Word document to be read.
  * @throws IOException
  */
 private static char[] loadMSWordDocx(InputStream inputStream) throws IOException {
   XWPFDocument docx = new XWPFDocument(inputStream);
   XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
   return extractor.getText().toCharArray();
 }