@Test public void testParsePages() throws Exception { InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testPages.pages"); Metadata metadata = new Metadata(); ContentHandler handler = new BodyContentHandler(); iWorkParser.parse(input, handler, metadata, parseContext); // Make sure enough keys came through // (Exact numbers will vary based on composites) assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 50); List<String> metadataKeys = Arrays.asList(metadata.names()); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE)); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.PAGE_COUNT.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.LAST_MODIFIED.getName())); assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.LANGUAGE)); // Check the metadata values assertEquals("application/vnd.apple.pages", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("Tika user", metadata.get(TikaCoreProperties.CREATOR)); assertEquals("Apache tika", metadata.get(TikaCoreProperties.TITLE)); assertEquals("2010-05-09T21:34:38+0200", metadata.get(Metadata.CREATION_DATE)); assertEquals("2010-05-09T23:50:36+0200", metadata.get(Metadata.LAST_MODIFIED)); assertEquals("en", metadata.get(TikaCoreProperties.LANGUAGE)); assertEquals("2", metadata.get(Metadata.PAGE_COUNT)); String content = handler.toString(); // text on page 1 assertContains("Sample pages document", content); assertContains("Some plain text to parse.", content); assertContains("Cell one", content); assertContains("Cell two", content); assertContains("Cell three", content); assertContains("Cell four", content); assertContains("Cell five", content); assertContains("Cell six", content); assertContains("Cell seven", content); assertContains("Cell eight", content); assertContains("Cell nine", content); assertContains("Both Pages 1.x and Keynote 2.x", content); // ... // text on page 2 assertContains("A second page....", content); assertContains("Extensible Markup Language", content); // ... }
@Test public void testParseKeynote() throws Exception { InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testKeynote.key"); Metadata metadata = new Metadata(); ContentHandler handler = new BodyContentHandler(); iWorkParser.parse(input, handler, metadata, parseContext); // Make sure enough keys came through // (Exact numbers will vary based on composites) assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 6); List<String> metadataKeys = Arrays.asList(metadata.names()); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE)); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.SLIDE_COUNT.getName())); // assertTrue("Metadata not found in " + metadataKeys, // metadataKeys.contains(Office.SLIDE_COUNT.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName())); // Check the metadata values assertEquals("application/vnd.apple.keynote", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("3", metadata.get(Metadata.SLIDE_COUNT)); assertEquals("1024", metadata.get(KeynoteContentHandler.PRESENTATION_WIDTH)); assertEquals("768", metadata.get(KeynoteContentHandler.PRESENTATION_HEIGHT)); assertEquals("Tika user", metadata.get(TikaCoreProperties.CREATOR)); assertEquals("Apache tika", metadata.get(TikaCoreProperties.TITLE)); String content = handler.toString(); assertContains("A sample presentation", content); assertContains("For the Apache Tika project", content); assertContains("Slide 1", content); assertContains("Some random text for the sake of testability.", content); assertContains("A nice comment", content); assertContains("A nice note", content); // test table data assertContains("Cell one", content); assertContains("Cell two", content); assertContains("Cell three", content); assertContains("Cell four", content); assertContains("Cell 5", content); assertContains("Cell six", content); assertContains("7", content); assertContains("Cell eight", content); assertContains("5/5/1985", content); }
@Test public void testParseNumbers() throws Exception { InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testNumbers.numbers"); Metadata metadata = new Metadata(); ContentHandler handler = new BodyContentHandler(); iWorkParser.parse(input, handler, metadata, parseContext); // Make sure enough keys came through // (Exact numbers will vary based on composites) assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 8); List<String> metadataKeys = Arrays.asList(metadata.names()); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE)); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.PAGE_COUNT.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName())); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.COMMENTS.getName())); assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.TITLE)); assertTrue( "Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName())); // Check the metadata values assertEquals("2", metadata.get(Metadata.PAGE_COUNT)); assertEquals("Tika User", metadata.get(TikaCoreProperties.CREATOR)); assertEquals("Account checking", metadata.get(TikaCoreProperties.TITLE)); assertEquals("a comment", metadata.get(TikaCoreProperties.COMMENTS)); String content = handler.toString(); assertContains("Category", content); assertContains("Home", content); assertContains("-226", content); assertContains("-137.5", content); assertContains("Checking Account: 300545668", content); assertContains("4650", content); assertContains("Credit Card", content); assertContains("Groceries", content); assertContains("-210", content); assertContains("Food", content); assertContains("Try adding your own account transactions to this table.", content); }