Example #1
0
  // write word
  public boolean writeWordFile(String path, String content) {
    boolean w = false;
    try {

      // byte b[] = content.getBytes("ISO-8859-1");
      byte b[] = content.getBytes();

      ByteArrayInputStream bais = new ByteArrayInputStream(b);

      POIFSFileSystem fs = new POIFSFileSystem();
      // DirectoryEntry directory = fs.getRoot();

      // DocumentEntry de = directory.createDocument("WordDocument",
      // bais);

      FileOutputStream ostream = new FileOutputStream(path);

      fs.writeFilesystem(ostream);

      bais.close();
      ostream.close();

    } catch (IOException e) {
      e.printStackTrace();
    }

    return w;
  }
 protected void setUp() throws Exception {
   ModelFactory mf = new ModelFactory();
   assertTrue("listeners member cannot be null", mf.listeners != null);
   models = new ArrayList(3);
   factory = new ModelFactory();
   book = new HSSFWorkbook();
   ByteArrayOutputStream stream = setupRunFile(book);
   POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(stream.toByteArray()));
   in = fs.createDocumentInputStream("Workbook");
 }
  public void testWriteProperties() throws Exception {
    // Just check we can write them back out into a filesystem
    POIFSFileSystem outFS = new POIFSFileSystem();
    doc.readProperties();
    doc.writeProperties(outFS);

    // Should now hold them
    assertNotNull(outFS.createDocumentInputStream("\005SummaryInformation"));
    assertNotNull(outFS.createDocumentInputStream("\005DocumentSummaryInformation"));
  }
Example #4
0
  /**
   * Setup is used to get the document ready. Gets the DocumentSummaryInformation and the
   * SummaryInformation to reasonable values
   */
  public void setUp() {
    bout = new ByteArrayOutputStream();
    poifs = new POIFSFileSystem();
    dir = poifs.getRoot();
    dsi = null;
    try {
      DocumentEntry dsiEntry =
          (DocumentEntry) dir.getEntry(DocumentSummaryInformation.DEFAULT_STREAM_NAME);
      DocumentInputStream dis = new DocumentInputStream(dsiEntry);
      PropertySet ps = new PropertySet(dis);
      dis.close();
      dsi = new DocumentSummaryInformation(ps);

    } catch (FileNotFoundException ex) {
      /* There is no document summary information yet. We have to create a
       * new one. */
      dsi = PropertySetFactory.newDocumentSummaryInformation();
      assertNotNull(dsi);
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    } catch (NoPropertySetStreamException e) {
      e.printStackTrace();
      fail();
    } catch (MarkUnsupportedException e) {
      e.printStackTrace();
      fail();
    } catch (UnexpectedPropertySetTypeException e) {
      e.printStackTrace();
      fail();
    }
    assertNotNull(dsi);
    try {
      DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
      DocumentInputStream dis = new DocumentInputStream(dsiEntry);
      PropertySet ps = new PropertySet(dis);
      dis.close();
      si = new SummaryInformation(ps);

    } catch (FileNotFoundException ex) {
      /* There is no document summary information yet. We have to create a
       * new one. */
      si = PropertySetFactory.newSummaryInformation();
      assertNotNull(si);
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    } catch (NoPropertySetStreamException e) {
      e.printStackTrace();
      fail();
    } catch (MarkUnsupportedException e) {
      e.printStackTrace();
      fail();
    } catch (UnexpectedPropertySetTypeException e) {
      e.printStackTrace();
      fail();
    }
    assertNotNull(dsi);
  }
  /**
   * Read an excel file and spit out what we find.
   *
   * @param args Expect one argument that is the file to read.
   * @throws IOException When there is an error processing the file.
   */
  public static void main(String[] args) throws IOException {

    try {
      Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver");
      // con = DriverManager
      // .getConnection("jdbc:sqlserver://10.130.133.3:1433;DatabaseName=RTB;user=i264678;password=;SelectMethod=cursor ");
      con =
          DriverManager.getConnection(
              "jdbc:sqlserver://10.135.128.227:1433;DatabaseName=RTBWTC;user=i264678;password=;SelectMethod=cursor ");
      System.out.println("connected");

      // create a new file input stream with the input file specified
      // at the command line
      FileInputStream fin = new FileInputStream(args[0]);
      // create a new org.apache.poi.poifs.filesystem.Filesystem
      POIFSFileSystem poifs = new POIFSFileSystem(fin);
      // get the Workbook (excel part) stream in a InputStream
      InputStream din = poifs.createDocumentInputStream("Workbook");
      // construct out HSSFRequest object
      HSSFRequest req = new HSSFRequest();
      // lazy listen for ALL records with the listener shown above
      req.addListenerForAllRecords(new LoadServiceFromExcel());
      // create our event factory
      HSSFEventFactory factory = new HSSFEventFactory();
      // process our events based on the document input stream
      factory.processEvents(req, din);
      // once all the events are processed close our file input stream
      fin.close();
      // and our document input stream (don't want to leak these!)
      din.close();
      System.out.println("done.");
    } catch (ClassNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (SQLException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        con.close();
      } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
  public void testWriteReadProperties() throws Exception {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    // Write them out
    POIFSFileSystem outFS = new POIFSFileSystem();
    doc.readProperties();
    doc.writeProperties(outFS);
    outFS.writeFilesystem(baos);

    // Create a new version
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    POIFSFileSystem inFS = new POIFSFileSystem(bais);

    // Check they're still there
    doc.filesystem = inFS;
    doc.readProperties();

    // Delegate test
    testReadProperties();
  }
Example #7
0
  /**
   * Runs the example program. The application expects one or two arguments:
   *
   * <ol>
   *   <li>
   *       <p>The first argument is the disk file name of the POI filesystem to copy.
   *   <li>
   *       <p>The second argument is optional. If it is given, it is the name of a disk file the
   *       copy of the POI filesystem will be written to. If it is not given, the copy will be
   *       written to a temporary file which will be deleted at the end of the program.
   * </ol>
   *
   * @param args Command-line arguments.
   * @exception MarkUnsupportedException if a POI document stream does not support the mark()
   *     operation.
   * @exception NoPropertySetStreamException if the application tries to create a property set from
   *     a POI document stream that is not a property set stream.
   * @exception IOException if any I/O exception occurs.
   * @exception UnsupportedEncodingException if a character encoding is not supported.
   */
  public static void main(final String[] args)
      throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException,
          IOException {
    String originalFileName = null;
    String copyFileName = null;

    /* Check the command-line arguments. */
    if (args.length == 1) {
      originalFileName = args[0];
      File f = TempFile.createTempFile("CopyOfPOIFileSystem-", ".ole2");
      f.deleteOnExit();
      copyFileName = f.getAbsolutePath();
    } else if (args.length == 2) {
      originalFileName = args[0];
      copyFileName = args[1];
    } else {
      System.err.println("Usage: " + CopyCompare.class.getName() + "originPOIFS [copyPOIFS]");
      System.exit(1);
    }

    /* Read the origin POIFS using the eventing API. The real work is done
     * in the class CopyFile which is registered here as a POIFSReader. */
    final POIFSReader r = new POIFSReader();
    final CopyFile cf = new CopyFile(copyFileName);
    r.registerListener(cf);
    r.read(new FileInputStream(originalFileName));

    /* Write the new POIFS to disk. */
    cf.close();

    /* Read all documents from the original POI file system and compare them
     * with the equivalent document from the copy. */
    final POIFSFileSystem opfs = new POIFSFileSystem(new FileInputStream(originalFileName));
    final POIFSFileSystem cpfs = new POIFSFileSystem(new FileInputStream(copyFileName));

    final DirectoryEntry oRoot = opfs.getRoot();
    final DirectoryEntry cRoot = cpfs.getRoot();
    final StringBuffer messages = new StringBuffer();
    if (equal(oRoot, cRoot, messages)) System.out.println("Equal");
    else System.out.println("Not equal: " + messages.toString());
  }
Example #8
0
  /** 导入 excel 2003 biff格式 如果是xml格式的 可以使用SAX(未测试) */
  @Test
  public void testImportExcel2003() throws Exception {

    long beginTime = System.currentTimeMillis();

    String fileName = "D:\\Backup\\Book1.xls";

    List<ExcelData> dataList = Lists.newArrayList();

    // 输入流
    InputStream fis = new BufferedInputStream(new FileInputStream(fileName));
    // 创建 org.apache.poi.poifs.filesystem.Filesystem
    POIFSFileSystem poifs = new POIFSFileSystem(fis);
    // 从输入流 得到 Workbook(excel 部分)流
    InputStream din = poifs.createDocumentInputStream("Workbook");
    // 构造 HSSFRequest
    HSSFRequest req = new HSSFRequest();

    // 添加监听器
    req.addListenerForAllRecords(new Excel2003Listener(dataList));
    //  创建事件工厂
    HSSFEventFactory factory = new HSSFEventFactory();
    // 根据文档输入流处理事件
    factory.processEvents(req, din);
    // 关闭输入流
    fis.close();
    // 关闭文档流
    din.close();

    System.out.println(dataList.size());
    // 把最后剩下的不足batchSize大小
    if (dataList.size() > 0) {
      doBatchSave(dataList);
    }

    long endTime = System.currentTimeMillis();
    log.info("耗时(秒):" + (endTime - beginTime) / 1000);
  }
Example #9
0
  /**
   * Creates the appropriate HSSFWorkbook / XSSFWorkbook from the given InputStream, which may be
   * password protected.
   *
   * <p>Your input stream MUST either support mark/reset, or be wrapped as a {@link
   * PushbackInputStream}! Note that using an {@link InputStream} has a higher memory footprint than
   * using a {@link File}.
   *
   * <p>Note that in order to properly release resources the Workbook should be closed after use.
   * Note also that loading from an InputStream requires more memory than loading from a File, so
   * prefer {@link #create(File)} where possible.
   *
   * @throws EncryptedDocumentException If the wrong password is given for a protected file
   * @throws EmptyFileException If an empty stream is given
   */
  public static Workbook create(InputStream inp, String password)
      throws IOException, InvalidFormatException, EncryptedDocumentException {
    // If clearly doesn't do mark/reset, wrap up
    if (!inp.markSupported()) {
      inp = new PushbackInputStream(inp, 8);
    }

    // Ensure that there is at least some data there
    byte[] header8 = IOUtils.peekFirst8Bytes(inp);

    // Try to create
    if (POIFSFileSystem.hasPOIFSHeader(header8)) {
      NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
      return create(fs, password);
    }
    if (POIXMLDocument.hasOOXMLHeader(inp)) {
      return new XSSFWorkbook(OPCPackage.open(inp));
    }
    throw new IllegalArgumentException(
        "Your InputStream was neither an OLE2 stream, nor an OOXML stream");
  }
Example #10
0
    /**
     * Ensures that the directory hierarchy for a document in a POI fileystem is in place. When a
     * document is to be created somewhere in a POI filesystem its directory must be created first.
     * This method creates all directories between the POI filesystem root and the directory the
     * document should belong to which do not yet exist.
     *
     * <p>Unfortunately POI does not offer a simple method to interrogate the POIFS whether a
     * certain child node (file or directory) exists in a directory. However, since we always start
     * with an empty POIFS which contains the root directory only and since each directory in the
     * POIFS is created by this method we can maintain the POIFS's directory hierarchy ourselves:
     * The {@link DirectoryEntry} of each directory created is stored in a {@link Map}. The
     * directories' path names map to the corresponding {@link DirectoryEntry} instances.
     *
     * @param poiFs The POI filesystem the directory hierarchy is created in, if needed.
     * @param path The document's path. This method creates those directory components of this
     *     hierarchy which do not yet exist.
     * @return The directory entry of the document path's parent. The caller should use this {@link
     *     DirectoryEntry} to create documents in it.
     */
    public DirectoryEntry getPath(final POIFSFileSystem poiFs, final POIFSDocumentPath path) {
      try {
        /* Check whether this directory has already been created. */
        final String s = path.toString();
        DirectoryEntry de = (DirectoryEntry) paths.get(s);
        if (de != null)
          /* Yes: return the corresponding DirectoryEntry. */
          return de;

        /* No: We have to create the directory - or return the root's
         * DirectoryEntry. */
        int l = path.length();
        if (l == 0)
          /* Get the root directory. It does not have to be created
           * since it always exists in a POIFS. */
          de = poiFs.getRoot();
        else {
          /* Create a subordinate directory. The first step is to
           * ensure that the parent directory exists: */
          de = getPath(poiFs, path.getParent());
          /* Now create the target directory: */
          de = de.createDirectory(path.getComponent(path.length() - 1));
        }
        paths.put(s, de);
        return de;
      } catch (IOException ex) {
        /* This exception will be thrown if the directory already
         * exists. However, since we have full control about directory
         * creation we can ensure that this will never happen. */
        ex.printStackTrace(System.err);
        throw new RuntimeException(ex.toString());
        /* FIXME (2): Replace the previous line by the following once we
         * no longer need JDK 1.3 compatibility. */
        // throw new RuntimeException(ex);
      }
    }
  protected HSSFWorkbook makeWorkbook(final Report report, final ELContext ctx) throws Exception {
    final byte[] emptyWorkbookData = new HSSFWorkbook().getBytes();
    final POIFSFileSystem fs;
    if (report.getTemplate() != null) {
      fs = new POIFSFileSystem(new ByteArrayInputStream(report.getTemplate()));
    } else {
      fs = new POIFSFileSystem();
      fs.createDocument(new ByteArrayInputStream(emptyWorkbookData), "Workbook");
    }

    final MutablePropertySet siProperties = new MutablePropertySet();
    final MutableSection siSection = (MutableSection) siProperties.getSections().get(0);
    siSection.setFormatID(SectionIDMap.SUMMARY_INFORMATION_ID);
    final MutableProperty p0 = new MutableProperty();
    p0.setID(PropertyIDMap.PID_CREATE_DTM);
    p0.setType(Variant.VT_FILETIME);
    p0.setValue(new Date());
    siSection.setProperty(p0);

    final String application = report.getDescription().getApplication(ctx);
    if (application != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_APPNAME);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(application);
      siSection.setProperty(p);
    }
    final String author = report.getDescription().getAuthor(ctx);
    if (author != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_AUTHOR);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(author);
      siSection.setProperty(p);
    }
    final String version = report.getDescription().getVersion(ctx);
    if (version != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_REVNUMBER);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(version);
      siSection.setProperty(p);
    }
    final String title = report.getDescription().getTitle(ctx);
    if (title != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_TITLE);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(title);
      siSection.setProperty(p);
    }
    final String subject = report.getDescription().getSubject(ctx);
    if (subject != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_SUBJECT);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(subject);
      siSection.setProperty(p);
    }
    final String comments = report.getDescription().getComments(ctx);
    if (comments != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_COMMENTS);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(comments);
      siSection.setProperty(p);
    }

    final MutablePropertySet dsiProperties = new MutablePropertySet();
    final MutableSection dsiSection = (MutableSection) dsiProperties.getSections().get(0);
    dsiSection.setFormatID(SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID[0]);
    final String company = report.getDescription().getCompany(ctx);
    if (company != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_COMPANY);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(company);
      dsiSection.setProperty(p);
    }
    final String category = report.getDescription().getCategory(ctx);
    if (category != null) {
      final MutableProperty p = new MutableProperty();
      p.setID(PropertyIDMap.PID_CATEGORY);
      p.setType(Variant.VT_LPWSTR);
      p.setValue(category);
      dsiSection.setProperty(p);
    }

    fs.createDocument(siProperties.toInputStream(), SummaryInformation.DEFAULT_STREAM_NAME);
    fs.createDocument(
        dsiProperties.toInputStream(), DocumentSummaryInformation.DEFAULT_STREAM_NAME);
    return new HSSFWorkbook(fs, true);
  }
Example #12
0
  public String extractText(InputStream in) throws IOException {
    ArrayList<WordTextPiece> text = new ArrayList<WordTextPiece>();
    POIFSFileSystem fsys = new POIFSFileSystem(in);

    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);
    din.close();
    // Prende le informazioni dall'header del documento
    int info = LittleEndian.getShort(header, 0xa);

    boolean useTable1 = (info & 0x200) != 0;

    // boolean useTable1 = true;

    // Prende informazioni dalla piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);
    // int complexOffset = LittleEndian.getInt(header);

    String tableName = null;
    if (useTable1) {
      tableName = "1Table";
    } else {
      tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    int multiple = findText(tableStream, complexOffset, text);

    StringBuffer sb = new StringBuffer();
    tableStream = null;

    for (int x = 0; x < text.size(); x++) {

      WordTextPiece nextPiece = (WordTextPiece) text.get(x);
      int start = nextPiece.getStart();
      int length = nextPiece.getLength();

      boolean unicode = nextPiece.usesUnicode();
      String toStr = null;
      if (unicode) {
        toStr = new String(header, start, length * multiple, "UTF-8");
      } else {
        toStr = new String(header, start, length, "big5");
      }
      sb.append(toStr).append(" ");
    }
    return sb.toString();
  }
Example #13
0
 /**
  * Writes the POI file system to a disk file.
  *
  * @throws FileNotFoundException
  * @throws IOException
  */
 public void close() throws FileNotFoundException, IOException {
   out = new FileOutputStream(dstName);
   poiFs.writeFilesystem(out);
   out.close();
 }
Example #14
0
  /**
   * Writes out the word file that is represented by an instance of this class.
   *
   * @param out The OutputStream to write to.
   * @throws IOException If there is an unexpected IOException from the passed in OutputStream.
   */
  public void write(OutputStream out) throws IOException {
    // initialize our streams for writing.
    HWPFFileSystem docSys = new HWPFFileSystem();
    HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
    HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
    // HWPFOutputStream dataStream = docSys.getStream("Data");
    int tableOffset = 0;

    // FileInformationBlock fib = (FileInformationBlock)_fib.clone();
    // clear the offsets and sizes in our FileInformationBlock.
    _fib.clearOffsetsSizes();

    // determine the FileInformationBLock size
    int fibSize = _fib.getSize();
    fibSize +=
        POIFSConstants.SMALLER_BIG_BLOCK_SIZE - (fibSize % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);

    // preserve space for the FileInformationBlock because we will be writing
    // it after we write everything else.
    byte[] placeHolder = new byte[fibSize];
    wordDocumentStream.write(placeHolder);
    int mainOffset = wordDocumentStream.getOffset();

    // write out the StyleSheet.
    _fib.setFcStshf(tableOffset);
    _ss.writeTo(tableStream);
    _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // get fcMin and fcMac because we will be writing the actual text with the
    // complex table.
    int fcMin = mainOffset;

    /*
     * clx (encoding of the sprm lists for a complex file and piece table
     * for a any file) Written immediately after the end of the previously
     * recorded structure. This is recorded in all Word documents
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 23 of 210
     */

    // write out the Complex table, includes text.
    _fib.setFcClx(tableOffset);
    _cft.writeTo(wordDocumentStream, tableStream);
    _fib.setLcbClx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();
    int fcMac = wordDocumentStream.getOffset();

    /*
     * dop (document properties record) Written immediately after the end of
     * the previously recorded structure. This is recorded in all Word
     * documents
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 23 of 210
     */

    // write out the DocumentProperties.
    _fib.setFcDop(tableOffset);
    _dop.writeTo(tableStream);
    _fib.setLcbDop(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    /*
     * plcfBkmkf (table recording beginning CPs of bookmarks) Written
     * immediately after the sttbfBkmk, if the document contains bookmarks.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    if (_bookmarksTables != null) {
      _bookmarksTables.writePlcfBkmkf(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * plcfBkmkl (table recording limit CPs of bookmarks) Written
     * immediately after the plcfBkmkf, if the document contains bookmarks.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    if (_bookmarksTables != null) {
      _bookmarksTables.writePlcfBkmkl(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * plcfbteChpx (bin table for CHP FKPs) Written immediately after the
     * previously recorded table. This is recorded in all Word documents.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    // write out the CHPBinTable.
    _fib.setFcPlcfbteChpx(tableOffset);
    _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
    _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    /*
     * plcfbtePapx (bin table for PAP FKPs) Written immediately after the
     * plcfbteChpx. This is recorded in all Word documents.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    // write out the PAPBinTable.
    _fib.setFcPlcfbtePapx(tableOffset);
    _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
    _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    /*
     * plcfendRef (endnote reference position table) Written immediately
     * after the previously recorded table if the document contains endnotes
     *
     * plcfendTxt (endnote text position table) Written immediately after
     * the plcfendRef if the document contains endnotes
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    _endnotesTables.writeRef(_fib, tableStream);
    _endnotesTables.writeTxt(_fib, tableStream);
    tableOffset = tableStream.getOffset();

    /*
     * plcffld*** (table of field positions and statuses for annotation
     * subdocument) Written immediately after the previously recorded table,
     * if the ******* subdocument contains fields.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    if (_fieldsTables != null) {
      _fieldsTables.write(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * plcffndRef (footnote reference position table) Written immediately
     * after the stsh if the document contains footnotes
     *
     * plcffndTxt (footnote text position table) Written immediately after
     * the plcffndRef if the document contains footnotes
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */
    _footnotesTables.writeRef(_fib, tableStream);
    _footnotesTables.writeTxt(_fib, tableStream);
    tableOffset = tableStream.getOffset();

    /*
     * plcfsed (section table) Written immediately after the previously
     * recorded table. Recorded in all Word documents
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 25 of 210
     */

    // write out the SectionTable.
    _fib.setFcPlcfsed(tableOffset);
    _st.writeTo(wordDocumentStream, tableStream);
    _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // write out the list tables
    if (_lt != null) {
      /*
       * plcflst (list formats) Written immediately after the end of the
       * previously recorded, if there are any lists defined in the
       * document. This begins with a short count of LSTF structures
       * followed by those LSTF structures. This is immediately followed
       * by the allocated data hanging off the LSTFs. This data consists
       * of the array of LVLs for each LSTF. (Each LVL consists of an LVLF
       * followed by two grpprls and an XST.)
       *
       * Microsoft Office Word 97-2007 Binary File Format (.doc)
       * Specification; Page 25 of 210
       */
      _lt.writeListDataTo(_fib, tableStream);
      tableOffset = tableStream.getOffset();

      /*
       * plflfo (more list formats) Written immediately after the end of
       * the plcflst and its accompanying data, if there are any lists
       * defined in the document. This consists first of a PL of LFO
       * records, followed by the allocated data (if any) hanging off the
       * LFOs. The allocated data consists of the array of LFOLVLFs for
       * each LFO (and each LFOLVLF is immediately followed by some LVLs).
       *
       * Microsoft Office Word 97-2007 Binary File Format (.doc)
       * Specification; Page 26 of 210
       */
      _fib.setFcPlfLfo(tableStream.getOffset());
      _lt.writeListOverridesTo(tableStream);
      _fib.setLcbPlfLfo(tableStream.getOffset() - tableOffset);
      tableOffset = tableStream.getOffset();
    }

    /*
     * sttbfBkmk (table of bookmark name strings) Written immediately after
     * the previously recorded table, if the document contains bookmarks.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 27 of 210
     */
    if (_bookmarksTables != null) {
      _bookmarksTables.writeSttbfBkmk(_fib, tableStream);
      tableOffset = tableStream.getOffset();
    }

    /*
     * sttbSavedBy (last saved by string table) Written immediately after
     * the previously recorded table.
     *
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 27 of 210
     */

    // write out the saved-by table.
    if (_sbt != null) {
      _fib.setFcSttbSavedBy(tableOffset);
      _sbt.writeTo(tableStream);
      _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);

      tableOffset = tableStream.getOffset();
    }

    // write out the revision mark authors table.
    if (_rmat != null) {
      _fib.setFcSttbfRMark(tableOffset);
      _rmat.writeTo(tableStream);
      _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);

      tableOffset = tableStream.getOffset();
    }

    // write out the FontTable.
    _fib.setFcSttbfffn(tableOffset);
    _ft.writeTo(tableStream);
    _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // set some variables in the FileInformationBlock.
    _fib.getFibBase().setFcMin(fcMin);
    _fib.getFibBase().setFcMac(fcMac);
    _fib.setCbMac(wordDocumentStream.getOffset());

    // make sure that the table, doc and data streams use big blocks.
    byte[] mainBuf = wordDocumentStream.toByteArray();
    if (mainBuf.length < 4096) {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length);
      mainBuf = tempBuf;
    }

    // Table1 stream will be used
    _fib.getFibBase().setFWhichTblStm(true);

    // write out the FileInformationBlock.
    // _fib.serialize(mainBuf, 0);
    _fib.writeTo(mainBuf, tableStream);

    byte[] tableBuf = tableStream.toByteArray();
    if (tableBuf.length < 4096) {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length);
      tableBuf = tempBuf;
    }

    byte[] dataBuf = _dataStream;
    if (dataBuf == null) {
      dataBuf = new byte[4096];
    }
    if (dataBuf.length < 4096) {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
      dataBuf = tempBuf;
    }

    // create new document preserving order of entries
    POIFSFileSystem pfs = new POIFSFileSystem();
    boolean docWritten = false;
    boolean dataWritten = false;
    boolean objectPoolWritten = false;
    boolean tableWritten = false;
    boolean propertiesWritten = false;
    for (Iterator<Entry> iter = directory.getEntries(); iter.hasNext(); ) {
      Entry entry = iter.next();
      if (entry.getName().equals(STREAM_WORD_DOCUMENT)) {
        if (!docWritten) {
          pfs.createDocument(new ByteArrayInputStream(mainBuf), STREAM_WORD_DOCUMENT);
          docWritten = true;
        }
      } else if (entry.getName().equals(STREAM_OBJECT_POOL)) {
        if (!objectPoolWritten) {
          _objectPool.writeTo(pfs.getRoot());
          objectPoolWritten = true;
        }
      } else if (entry.getName().equals(STREAM_TABLE_0) || entry.getName().equals(STREAM_TABLE_1)) {
        if (!tableWritten) {
          pfs.createDocument(new ByteArrayInputStream(tableBuf), STREAM_TABLE_1);
          tableWritten = true;
        }
      } else if (entry.getName().equals(SummaryInformation.DEFAULT_STREAM_NAME)
          || entry.getName().equals(DocumentSummaryInformation.DEFAULT_STREAM_NAME)) {
        if (!propertiesWritten) {
          writeProperties(pfs);
          propertiesWritten = true;
        }
      } else if (entry.getName().equals(STREAM_DATA)) {
        if (!dataWritten) {
          pfs.createDocument(new ByteArrayInputStream(dataBuf), STREAM_DATA);
          dataWritten = true;
        }
      } else {
        EntryUtils.copyNodeRecursively(entry, pfs.getRoot());
      }
    }

    if (!docWritten) pfs.createDocument(new ByteArrayInputStream(mainBuf), STREAM_WORD_DOCUMENT);
    if (!tableWritten) pfs.createDocument(new ByteArrayInputStream(tableBuf), STREAM_TABLE_1);
    if (!propertiesWritten) writeProperties(pfs);
    if (!dataWritten) pfs.createDocument(new ByteArrayInputStream(dataBuf), STREAM_DATA);
    if (!objectPoolWritten) _objectPool.writeTo(pfs.getRoot());

    pfs.writeFilesystem(out);
    this.directory = pfs.getRoot();

    /*
     * since we updated all references in FIB and etc, using new arrays to
     * access data
     */
    this.directory = pfs.getRoot();
    this._tableStream = tableStream.toByteArray();
    this._dataStream = dataBuf;
  }
 /**
  * This constructor loads a Word document from a POIFSFileSystem
  *
  * @param pfilesystem The POIFSFileSystem that contains the Word document.
  * @throws IOException If there is an unexpected IOException from the passed in POIFSFileSystem.
  */
 public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException {
   this(pfilesystem.getRoot());
 }
Example #16
0
    public void parseEmbedded(
        InputStream inputStream,
        ContentHandler contentHandler,
        Metadata metadata,
        boolean outputHtml)
        throws SAXException, IOException {
      String name = metadata.get(Metadata.RESOURCE_NAME_KEY);

      if (name == null) {
        name = "file" + count++;
      }

      MediaType contentType = detector.detect(inputStream, metadata);

      if (name.indexOf('.') == -1 && contentType != null) {
        try {
          name += config.getMimeRepository().forName(contentType.toString()).getExtension();
        } catch (MimeTypeException e) {
          e.printStackTrace();
        }
      }

      String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID);
      if (relID != null && !name.startsWith(relID)) {
        name = relID + "_" + name;
      }

      File outputFile = new File(extractDir, name);
      File parent = outputFile.getParentFile();
      if (!parent.exists()) {
        if (!parent.mkdirs()) {
          throw new IOException("unable to create directory \"" + parent + "\"");
        }
      }
      System.out.println("Extracting '" + name + "' (" + contentType + ") to " + outputFile);

      FileOutputStream os = null;

      try {
        os = new FileOutputStream(outputFile);

        if (inputStream instanceof TikaInputStream) {
          TikaInputStream tin = (TikaInputStream) inputStream;

          if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
            POIFSFileSystem fs = new POIFSFileSystem();
            copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
            fs.writeFilesystem(os);
          } else {
            IOUtils.copy(inputStream, os);
          }
        } else {
          IOUtils.copy(inputStream, os);
        }
      } catch (Exception e) {
        logger.warn("Ignoring unexpected exception trying to save embedded file " + name, e);
      } finally {
        if (os != null) {
          os.close();
        }
      }
    }
Example #17
0
  /**
   * Closes the ByteArrayOutputStream and reads it into a ByteArrayInputStream. When finished
   * writing information this method is used in the tests to start reading from the created document
   * and then the see if the results match.
   */
  public void closeAndReOpen() {

    try {
      dsi.write(dir, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
      si.write(dir, SummaryInformation.DEFAULT_STREAM_NAME);
    } catch (WritingNotSupportedException e) {
      e.printStackTrace();
      fail();
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    }

    si = null;
    dsi = null;
    try {

      poifs.writeFilesystem(bout);
      bout.flush();

    } catch (IOException e) {

      e.printStackTrace();
      fail();
    }

    InputStream is = new ByteArrayInputStream(bout.toByteArray());
    assertNotNull(is);
    POIFSFileSystem poifs = null;
    try {
      poifs = new POIFSFileSystem(is);
    } catch (IOException e) {

      e.printStackTrace();
      fail();
    }
    try {
      is.close();
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    }
    assertNotNull(poifs);
    /* Read the document summary information. */
    DirectoryEntry dir = poifs.getRoot();

    try {
      DocumentEntry dsiEntry =
          (DocumentEntry) dir.getEntry(DocumentSummaryInformation.DEFAULT_STREAM_NAME);
      DocumentInputStream dis = new DocumentInputStream(dsiEntry);
      PropertySet ps = new PropertySet(dis);
      dis.close();
      dsi = new DocumentSummaryInformation(ps);
    } catch (FileNotFoundException ex) {
      fail();
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    } catch (NoPropertySetStreamException e) {
      e.printStackTrace();
      fail();
    } catch (MarkUnsupportedException e) {
      e.printStackTrace();
      fail();
    } catch (UnexpectedPropertySetTypeException e) {
      e.printStackTrace();
      fail();
    }
    try {
      DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
      DocumentInputStream dis = new DocumentInputStream(dsiEntry);
      PropertySet ps = new PropertySet(dis);
      dis.close();
      si = new SummaryInformation(ps);

    } catch (FileNotFoundException ex) {
      /* There is no document summary information yet. We have to create a
       * new one. */
      si = PropertySetFactory.newSummaryInformation();
      assertNotNull(si);
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    } catch (NoPropertySetStreamException e) {
      e.printStackTrace();
      fail();
    } catch (MarkUnsupportedException e) {
      e.printStackTrace();
      fail();
    } catch (UnexpectedPropertySetTypeException e) {
      e.printStackTrace();
      fail();
    }
  }
Example #18
0
  /**
   * Add a embedded object to this presentation
   *
   * @return 0-based index of the embedded object
   */
  public int addEmbed(POIFSFileSystem poiData) {
    DirectoryNode root = poiData.getRoot();

    // prepare embedded data
    if (new ClassID().equals(root.getStorageClsid())) {
      // need to set class id
      Map<String, ClassID> olemap = getOleMap();
      ClassID classID = null;
      for (Map.Entry<String, ClassID> entry : olemap.entrySet()) {
        if (root.hasEntry(entry.getKey())) {
          classID = entry.getValue();
          break;
        }
      }
      if (classID == null) {
        throw new IllegalArgumentException("Unsupported embedded document");
      }

      root.setStorageClsid(classID);
    }

    ExEmbed exEmbed = new ExEmbed();
    // remove unneccessary infos, so we don't need to specify the type
    // of the ole object multiple times
    Record children[] = exEmbed.getChildRecords();
    exEmbed.removeChild(children[2]);
    exEmbed.removeChild(children[3]);
    exEmbed.removeChild(children[4]);

    ExEmbedAtom eeEmbed = exEmbed.getExEmbedAtom();
    eeEmbed.setCantLockServerB(true);

    ExOleObjAtom eeAtom = exEmbed.getExOleObjAtom();
    eeAtom.setDrawAspect(ExOleObjAtom.DRAW_ASPECT_VISIBLE);
    eeAtom.setType(ExOleObjAtom.TYPE_EMBEDDED);
    // eeAtom.setSubType(ExOleObjAtom.SUBTYPE_EXCEL);
    // should be ignored?!?, see MS-PPT ExOleObjAtom, but Libre Office sets it ...
    eeAtom.setOptions(1226240);

    ExOleObjStg exOleObjStg = new ExOleObjStg();
    try {
      final String OLESTREAM_NAME = "\u0001Ole";
      if (!root.hasEntry(OLESTREAM_NAME)) {
        // the following data was taken from an example libre office document
        // beside this "\u0001Ole" record there were several other records, e.g. CompObj,
        // OlePresXXX, but it seems, that they aren't neccessary
        byte oleBytes[] = {1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
        poiData.createDocument(new ByteArrayInputStream(oleBytes), OLESTREAM_NAME);
      }

      ByteArrayOutputStream bos = new ByteArrayOutputStream();
      poiData.writeFilesystem(bos);
      exOleObjStg.setData(bos.toByteArray());
    } catch (IOException e) {
      throw new HSLFException(e);
    }

    int psrId = addPersistentObject(exOleObjStg);
    exOleObjStg.setPersistId(psrId);
    eeAtom.setObjStgDataRef(psrId);

    int objectId = addToObjListAtom(exEmbed);
    eeAtom.setObjID(objectId);
    return objectId;
  }
 public ExcelExtractor(POIFSFileSystem fs) throws IOException {
   this(fs.getRoot(), fs);
 }
Example #20
0
 public InputStream getDataStream(POIFSFileSystem fs)
     throws IOException, GeneralSecurityException {
   return getDataStream(fs.getRoot());
 }
 /**
  * Processes a file into essentially record events.
  *
  * @param req an Instance of HSSFRequest which has your registered listeners
  * @param fs a POIFS filesystem containing your workbook
  * @return numeric user-specified result code.
  */
 public short abortableProcessWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
     throws IOException, HSSFUserException {
   return abortableProcessWorkbookEvents(req, fs.getRoot());
 }
 /**
  * Processes a file into essentially record events.
  *
  * @param req an Instance of HSSFRequest which has your registered listeners
  * @param fs a POIFS filesystem containing your workbook
  */
 public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException {
   processWorkbookEvents(req, fs.getRoot());
 }