コード例 #1
0
ファイル: ReadMetadata.java プロジェクト: Martum/pdfbox-test
  private static void extract(InputStream in) throws Exception {
    PDDocument document = null;
    try {
      PDFParser parser = new PDFParser(in);
      parser.parse();
      document = parser.getPDDocument();
      if (document.isEncrypted()) {
        System.err.println("Document is Encrypted!");
      }
      PDDocumentCatalog cat = document.getDocumentCatalog();
      PDMetadata metadata = cat.getMetadata();
      if (metadata != null) {
        // System.out.println(metadata.getStream().getStreamTokens());

        // Levantamos la MetaData
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(metadata.createInputStream());

        // Buscamos el tag de SEmployee y el Element -> CUIT
        NodeList nList = doc.getElementsByTagName("foaf:SEmployee");
        Element elem = (Element) nList.item(0);
        String cuit = elem.getElementsByTagName("foaf:cuit").item(0).getTextContent();

        System.out.println(cuit);

        System.out.println("---");
        System.out.println(metadata.getInputStreamAsString());
      }
    } catch (Exception err) {
      throw err;
    } finally {
      if (document != null)
        try {
          document.close();
        } catch (Throwable err2) {
        }
    }
  }