Ejemplo n.º 1
0
  public void run() throws IOException {
    NPOIFSFileSystem fs = new NPOIFSFileSystem(new File(file), true);
    try {
      InputStream din = BiffViewer.getPOIFSInputStream(fs);
      try {
        RecordInputStream rinp = new RecordInputStream(din);

        while (rinp.hasNextRecord()) {
          int sid = rinp.getNextSid();
          rinp.nextRecord();

          int size = rinp.available();
          Class<? extends Record> clz = RecordFactory.getRecordClass(sid);

          System.out.print(formatSID(sid) + " - " + formatSize(size) + " bytes");
          if (clz != null) {
            System.out.print("  \t");
            System.out.print(clz.getName().replace("org.apache.poi.hssf.record.", ""));
          }
          System.out.println();

          byte[] data = rinp.readRemainder();
          if (data.length > 0) {
            System.out.print("   ");
            System.out.println(formatData(data));
          }
        }
      } finally {
        din.close();
      }
    } finally {
      fs.close();
    }
  }
Ejemplo n.º 2
0
 public void testFactory(String file, String protectedFile, String password) throws Exception {
   SlideShow<?, ?> ss;
   // from file
   ss = SlideShowFactory.create(fromFile(file));
   assertNotNull(ss);
   // from stream
   ss = SlideShowFactory.create(fromStream(file));
   assertNotNull(ss);
   // from NPOIFS
   if (!file.contains("pptx")) {
     NPOIFSFileSystem npoifs = new NPOIFSFileSystem(fromFile(file));
     ss = SlideShowFactory.create(npoifs);
     assertNotNull(ss);
     npoifs.close();
   }
   // from protected file
   ss = SlideShowFactory.create(fromFile(protectedFile), password);
   assertNotNull(ss);
   // from protected stream
   ss = SlideShowFactory.create(fromStream(protectedFile), password);
   assertNotNull(ss);
   // from protected NPOIFS
   NPOIFSFileSystem npoifs = new NPOIFSFileSystem(fromFile(protectedFile));
   ss = SlideShowFactory.create(npoifs, password);
   assertNotNull(ss);
   npoifs.close();
 }
Ejemplo n.º 3
0
 public static Workbook getWorkBook(String filePath) throws IOException {
   if (!filePath.contains(".xlsx")) {
     NPOIFSFileSystem fs = new NPOIFSFileSystem(new File(filePath));
     HSSFWorkbook wb = new HSSFWorkbook(fs.getRoot(), false);
     fs.close();
     return wb;
   } else {
     FileInputStream fileInputStream = new FileInputStream(filePath);
     XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fileInputStream);
     fileInputStream.close();
     return xssfWorkbook;
   }
 }
Ejemplo n.º 4
0
  public void testAreDocumentsIdentical() throws Exception {
    POIFSFileSystem fs = new POIFSFileSystem();
    DirectoryEntry dirA = fs.createDirectory("DirA");
    DirectoryEntry dirB = fs.createDirectory("DirB");

    DocumentEntry entryA1 = dirA.createDocument("Entry1", new ByteArrayInputStream(dataSmallA));
    DocumentEntry entryA1b = dirA.createDocument("Entry1b", new ByteArrayInputStream(dataSmallA));
    DocumentEntry entryA2 = dirA.createDocument("Entry2", new ByteArrayInputStream(dataSmallB));
    DocumentEntry entryB1 = dirB.createDocument("Entry1", new ByteArrayInputStream(dataSmallA));

    // Names must match
    assertEquals(false, entryA1.getName().equals(entryA1b.getName()));
    assertEquals(false, EntryUtils.areDocumentsIdentical(entryA1, entryA1b));

    // Contents must match
    assertEquals(false, EntryUtils.areDocumentsIdentical(entryA1, entryA2));

    // Parents don't matter if contents + names are the same
    assertEquals(false, entryA1.getParent().equals(entryB1.getParent()));
    assertEquals(true, EntryUtils.areDocumentsIdentical(entryA1, entryB1));

    // Can work with NPOIFS + POIFS
    ByteArrayOutputStream tmpO = new ByteArrayOutputStream();
    fs.writeFilesystem(tmpO);
    ByteArrayInputStream tmpI = new ByteArrayInputStream(tmpO.toByteArray());
    NPOIFSFileSystem nfs = new NPOIFSFileSystem(tmpI);

    DirectoryEntry dN1 = (DirectoryEntry) nfs.getRoot().getEntry("DirA");
    DirectoryEntry dN2 = (DirectoryEntry) nfs.getRoot().getEntry("DirB");
    DocumentEntry eNA1 = (DocumentEntry) dN1.getEntry(entryA1.getName());
    DocumentEntry eNA2 = (DocumentEntry) dN1.getEntry(entryA2.getName());
    DocumentEntry eNB1 = (DocumentEntry) dN2.getEntry(entryB1.getName());

    assertEquals(false, EntryUtils.areDocumentsIdentical(eNA1, eNA2));
    assertEquals(true, EntryUtils.areDocumentsIdentical(eNA1, eNB1));

    assertEquals(false, EntryUtils.areDocumentsIdentical(eNA1, entryA1b));
    assertEquals(false, EntryUtils.areDocumentsIdentical(eNA1, entryA2));

    assertEquals(true, EntryUtils.areDocumentsIdentical(eNA1, entryA1));
    assertEquals(true, EntryUtils.areDocumentsIdentical(eNA1, entryB1));
  }
Ejemplo n.º 5
0
  private static Set<String> getTopLevelNames(TikaInputStream stream) throws IOException {
    // Force the document stream to a (possibly temporary) file
    // so we don't modify the current position of the stream
    File file = stream.getFile();

    try {
      NPOIFSFileSystem fs = new NPOIFSFileSystem(file, true);

      // Optimize a possible later parsing process by keeping
      // a reference to the already opened POI file system
      stream.setOpenContainer(fs);

      return getTopLevelNames(fs.getRoot());
    } catch (IOException e) {
      // Parse error in POI, so we don't know the file type
      return Collections.emptySet();
    } catch (RuntimeException e) {
      // Another problem in POI
      return Collections.emptySet();
    }
  }
Ejemplo n.º 6
0
 /**
  * read in a file and write it back out again
  *
  * @param args names of the files; arg[ 0 ] is the input file, arg[ 1 ] is the output file
  * @exception IOException
  */
 public static void main(String args[]) throws IOException {
   if (args.length != 2) {
     System.err.println("two arguments required: input filename and output filename");
     System.exit(1);
   }
   FileInputStream istream = new FileInputStream(args[0]);
   try {
     FileOutputStream ostream = new FileOutputStream(args[1]);
     try {
       NPOIFSFileSystem fs = new NPOIFSFileSystem(istream);
       try {
         fs.writeFilesystem(ostream);
       } finally {
         fs.close();
       }
     } finally {
       ostream.close();
     }
   } finally {
     istream.close();
   }
 }
Ejemplo n.º 7
0
  /**
   * @param bytes
   * @param metadata incoming metadata
   * @param unknownFilenameCount
   * @return byte[] for contents of obj data
   * @throws IOException
   */
  protected byte[] parse(byte[] bytes, Metadata metadata, AtomicInteger unknownFilenameCount)
      throws IOException {
    ByteArrayInputStream is = new ByteArrayInputStream(bytes);
    long version = readUInt(is);
    metadata.add(RTFMetadata.EMB_APP_VERSION, Long.toString(version));

    long formatId = readUInt(is);
    // 2 is an embedded object. 1 is a link.
    if (formatId != 2L) {
      return null;
    }
    String className = readLengthPrefixedAnsiString(is).trim();
    String topicName = readLengthPrefixedAnsiString(is).trim();
    String itemName = readLengthPrefixedAnsiString(is).trim();

    if (className != null && className.length() > 0) {
      metadata.add(RTFMetadata.EMB_CLASS, className);
    }
    if (topicName != null && topicName.length() > 0) {
      metadata.add(RTFMetadata.EMB_TOPIC, topicName);
    }
    if (itemName != null && itemName.length() > 0) {
      metadata.add(RTFMetadata.EMB_ITEM, itemName);
    }

    long dataSz = readUInt(is);

    // readBytes tests for reading too many bytes
    byte[] embObjBytes = readBytes(is, dataSz);

    if (className.toLowerCase(Locale.ROOT).equals("package")) {
      return handlePackage(embObjBytes, metadata);
    } else if (className.toLowerCase(Locale.ROOT).equals("pbrush")) {
      // simple bitmap bytes
      return embObjBytes;
    } else {
      ByteArrayInputStream embIs = new ByteArrayInputStream(embObjBytes);
      if (NPOIFSFileSystem.hasPOIFSHeader(embIs)) {
        try {
          return handleEmbeddedPOIFS(embIs, metadata, unknownFilenameCount);
        } catch (IOException e) {
          // swallow
        }
      }
    }
    return embObjBytes;
  }
Ejemplo n.º 8
0
  /** Creates a Workbook from the given NPOIFSFileSystem, which may be password protected */
  private static Workbook create(NPOIFSFileSystem fs, String password)
      throws IOException, InvalidFormatException {
    DirectoryNode root = fs.getRoot();

    // Encrypted OOXML files go inside OLE2 containers, is this one?
    if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
      EncryptionInfo info = new EncryptionInfo(fs);
      Decryptor d = Decryptor.getInstance(info);

      boolean passwordCorrect = false;
      InputStream stream = null;
      try {
        if (password != null && d.verifyPassword(password)) {
          passwordCorrect = true;
        }
        if (!passwordCorrect && d.verifyPassword(Decryptor.DEFAULT_PASSWORD)) {
          passwordCorrect = true;
        }
        if (passwordCorrect) {
          stream = d.getDataStream(root);
        }
      } catch (GeneralSecurityException e) {
        throw new IOException(e);
      }

      if (!passwordCorrect) {
        if (password != null) throw new EncryptedDocumentException("Password incorrect");
        else
          throw new EncryptedDocumentException(
              "The supplied spreadsheet is protected, but no password was supplied");
      }

      OPCPackage pkg = OPCPackage.open(stream);
      return create(pkg);
    }

    // If we get here, it isn't an encrypted XLSX file
    // So, treat it as a regular HSSF XLS one
    if (password != null) {
      Biff8EncryptionKey.setCurrentUserPassword(password);
    }
    Workbook wb = new HSSFWorkbook(root, true);
    Biff8EncryptionKey.setCurrentUserPassword(null);
    return wb;
  }
Ejemplo n.º 9
0
 public InputStream getDataStream(NPOIFSFileSystem fs)
     throws IOException, GeneralSecurityException {
   return getDataStream(fs.getRoot());
 }
Ejemplo n.º 10
0
  // will throw IOException if not actually POIFS
  // can return null byte[]
  private byte[] handleEmbeddedPOIFS(
      InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException {

    NPOIFSFileSystem fs = null;
    byte[] ret = null;
    try {

      fs = new NPOIFSFileSystem(is);

      DirectoryNode root = fs.getRoot();

      if (root == null) {
        return ret;
      }

      if (root.hasEntry("Package")) {
        Entry ooxml = root.getEntry("Package");
        TikaInputStream stream =
            TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));

        ByteArrayOutputStream out = new ByteArrayOutputStream();

        IOUtils.copy(stream, out);
        ret = out.toByteArray();
      } else {
        // try poifs
        POIFSDocumentType type = POIFSDocumentType.detectType(root);
        if (type == POIFSDocumentType.OLE10_NATIVE) {
          try {
            // Try to un-wrap the OLE10Native record:
            Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root);
            ret = ole.getDataBuffer();
          } catch (Ole10NativeException ex) {
            // Not a valid OLE10Native record, skip it
          }
        } else if (type == POIFSDocumentType.COMP_OBJ) {

          DocumentEntry contentsEntry;
          try {
            contentsEntry = (DocumentEntry) root.getEntry("CONTENTS");
          } catch (FileNotFoundException ioe) {
            contentsEntry = (DocumentEntry) root.getEntry("Contents");
          }

          DocumentInputStream inp = null;
          try {
            inp = new DocumentInputStream(contentsEntry);
            ret = new byte[contentsEntry.getSize()];
            inp.readFully(ret);
          } finally {
            if (inp != null) {
              inp.close();
            }
          }
        } else {

          ByteArrayOutputStream out = new ByteArrayOutputStream();
          is.reset();
          IOUtils.copy(is, out);
          ret = out.toByteArray();
          metadata.set(
              Metadata.RESOURCE_NAME_KEY,
              "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
          metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
        }
      }
    } finally {
      if (fs != null) {
        fs.close();
      }
    }
    return ret;
  }