@Test() public void testSonyDSCHXV5() throws IOException, CatalogTransformerException, UnsupportedQueryException, SourceUnavailableException, FederationException, ParseException { File file = new File(TEST_DATA_PATH + "Sony DSC-HX5V.jpg"); FileInputStream fis = FileUtils.openInputStream(file); Metacard metacard = createTransformer().transform(fis); assertNotNull(metacard); assertNotNull(metacard.getCreatedDate()); assertThat(metacard.getCreatedDate().getYear() + 1900, is(2010)); assertThat(metacard.getCreatedDate().getMonth() + 1, is(7)); assertThat(metacard.getCreatedDate().getDate(), is(14)); assertThat(metacard.getCreatedDate().getHours(), is(11)); assertThat(metacard.getCreatedDate().getMinutes(), is(00)); assertThat(metacard.getCreatedDate().getSeconds(), is(23)); assertNotNull(metacard.getModifiedDate()); assertThat(metacard.getModifiedDate().getYear() + 1900, is(2010)); assertThat(metacard.getModifiedDate().getMonth() + 1, is(7)); assertThat(metacard.getModifiedDate().getDate(), is(14)); assertThat(metacard.getModifiedDate().getHours(), is(11)); assertThat(metacard.getModifiedDate().getMinutes(), is(00)); assertThat(metacard.getModifiedDate().getSeconds(), is(23)); WKTReader reader = new WKTReader(); Geometry geometry = reader.read(metacard.getLocation()); assertEquals(-104.303846389, geometry.getCoordinate().x, 0.00001); assertEquals(39.5698783333, geometry.getCoordinate().y, 0.00001); byte[] thumbnail = metacard.getThumbnail(); assertNotNull(thumbnail); assertThat(thumbnail.length, is(11490)); }
protected void applyUpdateOperationRules( UpdateRequest updateRequest, Entry<Serializable, Metacard> anUpdate, Metacard aMetacard, Date now) { if (UpdateRequest.UPDATE_BY_ID.equals(updateRequest.getAttributeName()) && !anUpdate.getKey().toString().equals(aMetacard.getId())) { LOGGER.info( "{} in metacard must match the Update {}, overwriting metacard {} [{}] with the update identifier [{}]", Metacard.ID, Metacard.ID, Metacard.ID, aMetacard.getId(), anUpdate.getKey()); aMetacard.setAttribute(new AttributeImpl(Metacard.ID, anUpdate.getKey())); } if (aMetacard.getCreatedDate() == null) { LOGGER.info( "{} date should match the original metacard. Changing date to current timestamp so it is at least not null.", Metacard.CREATED); aMetacard.setAttribute(new AttributeImpl(Metacard.CREATED, now)); } if (aMetacard.getModifiedDate() == null) { aMetacard.setAttribute(new AttributeImpl(Metacard.MODIFIED, now)); } if (aMetacard.getEffectiveDate() == null) { aMetacard.setAttribute(new AttributeImpl(Metacard.EFFECTIVE, now)); } }
@Test() public void testIPhone() throws IOException, CatalogTransformerException, UnsupportedQueryException, SourceUnavailableException, FederationException, ParseException { File file = new File(TEST_DATA_PATH + "Apple iPhone 4.jpg"); FileInputStream fis = FileUtils.openInputStream(file); Metacard metacard = createTransformer().transform(fis); assertNotNull(metacard); assertNotNull(metacard.getCreatedDate()); assertThat(metacard.getCreatedDate().getYear() + 1900, is(2011)); assertThat(metacard.getCreatedDate().getMonth() + 1, is(1)); assertThat(metacard.getCreatedDate().getDate(), is(13)); assertThat(metacard.getCreatedDate().getHours(), is(14)); assertThat(metacard.getCreatedDate().getMinutes(), is(33)); assertThat(metacard.getCreatedDate().getSeconds(), is(39)); assertEquals(metacard.getCreatedDate(), metacard.getModifiedDate()); WKTReader reader = new WKTReader(); Geometry geometry = reader.read(metacard.getLocation()); assertEquals(12.488833, geometry.getCoordinate().x, 0.00001); assertEquals(41.853, geometry.getCoordinate().y, 0.00001); }
@Test public void testGeoTaggedJpeg() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testJPEG_GEO.jpg"); /* * The dates in testJPED_GEO.jpg do not contain timezones. If no timezone is specified, * the Tika input transformer assumes the local time zone. Set the system timezone to UTC * so we can do assertions. */ TimeZone defaultTimeZone = TimeZone.getDefault(); TimeZone.setDefault(TimeZone.getTimeZone("UTC")); Metacard metacard = transform(stream); assertNotNull(metacard); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("<meta name=\"Model\" content=\"Canon EOS 40D\"/>")); assertThat(metacard.getContentTypeName(), is("image/jpeg")); assertThat(convertDate(metacard.getCreatedDate()), is("2009-08-11 09:09:45 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2009-10-02 23:02:49 UTC")); assertThat( (String) metacard.getAttribute(Metacard.GEOGRAPHY).getValue(), is("POINT(-54.1234 12.54321)")); // Reset timezone back to local time zone. TimeZone.setDefault(defaultTimeZone); }
@Test public void testOpenOffice() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testOpenOffice2.odt"); /* * The dates in testOpenOffice2.odt do not contain timezones. If no timezone is specified, * the Tika input transformer assumes the local time zone. Set the system timezone to UTC * so we can do assertions. */ TimeZone defaultTimeZone = TimeZone.getDefault(); TimeZone.setDefault(TimeZone.getTimeZone("UTC")); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Test OpenOffice2 Document")); assertThat(convertDate(metacard.getCreatedDate()), is("2007-09-14 11:06:08 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2013-02-13 06:52:10 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("This is a sample Open Office document, written in NeoOffice 2.2.1")); assertThat(metacard.getContentTypeName(), is("application/vnd.oasis.opendocument.text")); // Reset timezone back to local time zone. TimeZone.setDefault(defaultTimeZone); }
@Test public void testPpt() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testPPT.ppt"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Sample Powerpoint Slide")); assertThat(convertDate(metacard.getCreatedDate()), is("2007-09-14 17:33:12 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2007-09-14 19:16:39 UTC")); assertNotNull(metacard.getMetadata()); assertThat(metacard.getMetadata(), containsString("Created with Microsoft")); assertThat(metacard.getContentTypeName(), is("application/vnd.ms-powerpoint")); }
@Test public void testPDF() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testPDF.pdf"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Apache Tika - Apache Tika")); assertThat(convertDate(metacard.getCreatedDate()), is("2007-09-15 09:02:31 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2007-09-15 09:02:31 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("<meta name=\"xmpTPg:NPages\" content=\"1\"/>")); assertThat(metacard.getContentTypeName(), is("application/pdf")); }
@Test public void testXls() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testEXCEL.xls"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Simple Excel document")); assertThat(convertDate(metacard.getCreatedDate()), is("2007-10-01 16:13:56 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2007-10-01 16:31:43 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("Written and saved in Microsoft Excel X for Mac Service Release 1.")); assertThat(metacard.getContentTypeName(), is("application/vnd.ms-excel")); }
@Test public void testMp4() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testMP4.m4a"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Test Title")); assertThat(convertDate(metacard.getCreatedDate()), is("2012-01-28 18:39:18 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2012-01-28 18:40:25 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("<meta name=\"xmpDM:artist\" content=\"Test Artist\"/>")); assertThat(metacard.getContentTypeName(), is("audio/mp4")); }
@Test public void testWordDoc() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testWORD.docx"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Sample Word Document")); assertThat(convertDate(metacard.getCreatedDate()), is("2008-12-11 16:04:00 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2010-11-12 16:21:00 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("<p>This is a sample Microsoft Word Document.</p>")); assertThat( metacard.getContentTypeName(), is("application/vnd.openxmlformats-officedocument.wordprocessingml.document")); }
@Test public void testXlsx() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testEXCEL.xlsx"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Simple Excel document")); assertThat(convertDate(metacard.getCreatedDate()), is("2007-10-01 16:13:56 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2008-12-11 16:02:17 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("Sample Excel Worksheet - Numbers and their Squares")); assertThat( metacard.getContentTypeName(), is("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")); }
@Test public void testPptx() throws Exception { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("testPPT.pptx"); Metacard metacard = transform(stream); assertNotNull(metacard); assertThat(metacard.getTitle(), is("Attachment Test")); assertThat(convertDate(metacard.getCreatedDate()), is("2010-05-04 06:43:54 UTC")); assertThat(convertDate(metacard.getModifiedDate()), is("2010-06-29 06:34:35 UTC")); assertNotNull(metacard.getMetadata()); assertThat( metacard.getMetadata(), containsString("content as every other file being tested for tika content parsing")); assertThat( metacard.getContentTypeName(), is("application/vnd.openxmlformats-officedocument.presentationml.presentation")); }
private void assertMetacard(Metacard mc, Map<String, Object> expectedValues) { assertThat(mc.getId(), equalTo((String) expectedValues.get(Metacard.ID))); assertListStringAttribute( mc, CswRecordMetacardType.CSW_IDENTIFIER, (String[]) expectedValues.get(CswRecordMetacardType.CSW_IDENTIFIER)); assertThat(mc.getTitle(), equalTo((String) expectedValues.get(Metacard.TITLE))); assertListStringAttribute( mc, CswRecordMetacardType.CSW_TITLE, (String[]) expectedValues.get(CswRecordMetacardType.CSW_TITLE)); assertThat(mc.getModifiedDate(), equalTo((Date) expectedValues.get(Metacard.MODIFIED))); assertListStringAttribute( mc, CswRecordMetacardType.CSW_MODIFIED, (String[]) expectedValues.get(CswRecordMetacardType.CSW_MODIFIED)); assertListStringAttribute( mc, CswRecordMetacardType.CSW_SUBJECT, (String[]) expectedValues.get(CswRecordMetacardType.CSW_SUBJECT)); assertListStringAttribute( mc, CswRecordMetacardType.CSW_ABSTRACT, (String[]) expectedValues.get(CswRecordMetacardType.CSW_ABSTRACT)); assertListStringAttribute( mc, CswRecordMetacardType.CSW_RIGHTS, (String[]) expectedValues.get(CswRecordMetacardType.CSW_RIGHTS)); assertListStringAttribute( mc, CswRecordMetacardType.CSW_LANGUAGE, (String[]) expectedValues.get(CswRecordMetacardType.CSW_LANGUAGE)); assertThat( (String) mc.getAttribute(CswRecordMetacardType.CSW_TYPE).getValue(), equalTo((String) expectedValues.get(CswRecordMetacardType.CSW_TYPE))); assertListStringAttribute( mc, CswRecordMetacardType.CSW_FORMAT, (String[]) expectedValues.get(CswRecordMetacardType.CSW_FORMAT)); assertThat(mc.getLocation(), equalTo((String) expectedValues.get(Metacard.GEOGRAPHY))); assertListStringAttribute( mc, CswRecordMetacardType.OWS_BOUNDING_BOX, (String[]) expectedValues.get(CswRecordMetacardType.OWS_BOUNDING_BOX)); }
protected void applyCreatedOperationRules( CreateRequest createRequest, Metacard aMetacard, Date now) { LOGGER.debug("Applying standard rules on CreateRequest"); aMetacard.setAttribute( new AttributeImpl(Metacard.ID, UUID.randomUUID().toString().replaceAll("-", ""))); if (aMetacard.getCreatedDate() == null) { aMetacard.setAttribute(new AttributeImpl(Metacard.CREATED, now)); } if (aMetacard.getModifiedDate() == null) { aMetacard.setAttribute(new AttributeImpl(Metacard.MODIFIED, now)); } if (aMetacard.getEffectiveDate() == null) { aMetacard.setAttribute(new AttributeImpl(Metacard.EFFECTIVE, now)); } }
@Override protected Object doExecute() throws Exception { String formatString = "%1$-33s %2$-26s %3$-" + TITLE_MAX_LENGTH + "s %4$-" + EXCERPT_MAX_LENGTH + "s%n"; CatalogFacade catalogProvider = getCatalog(); Filter filter = null; if (cqlFilter != null) { filter = CQL.toFilter(cqlFilter); } else { if (searchPhrase == null) { searchPhrase = "*"; } if (caseSensitive) { filter = getFilterBuilder() .attribute(Metacard.ANY_TEXT) .is() .like() .caseSensitiveText(searchPhrase); } else { filter = getFilterBuilder().attribute(Metacard.ANY_TEXT).is().like().text(searchPhrase); } } QueryImpl query = new QueryImpl(filter); query.setRequestsTotalResultsCount(true); if (numberOfItems > -1) { query.setPageSize(numberOfItems); } long start = System.currentTimeMillis(); SourceResponse response = catalogProvider.query(new QueryRequestImpl(query)); long end = System.currentTimeMillis(); int size = 0; if (response.getResults() != null) { size = response.getResults().size(); } console.println(); console.printf( " %d result(s) out of %s%d%s in %3.3f seconds", (size), Ansi.ansi().fg(Ansi.Color.CYAN).toString(), response.getHits(), Ansi.ansi().reset().toString(), (end - start) / MILLISECONDS_PER_SECOND); console.printf(formatString, "", "", "", ""); printHeaderMessage(String.format(formatString, ID, DATE, TITLE, EXCERPT)); for (Result result : response.getResults()) { Metacard metacard = result.getMetacard(); String title = (metacard.getTitle() != null ? metacard.getTitle() : "N/A"); String excerpt = "N/A"; String modifiedDate = ""; if (searchPhrase != null) { if (metacard.getMetadata() != null) { XPathHelper helper = new XPathHelper(metacard.getMetadata()); String indexedText = helper.getDocument().getDocumentElement().getTextContent(); indexedText = indexedText.replaceAll("\\r\\n|\\r|\\n", " "); String normalizedSearchPhrase = searchPhrase.replaceAll("\\*", ""); int index = -1; if (caseSensitive) { index = indexedText.indexOf(normalizedSearchPhrase); } else { index = indexedText.toLowerCase().indexOf(normalizedSearchPhrase.toLowerCase()); } if (index != -1) { int contextLength = (EXCERPT_MAX_LENGTH - normalizedSearchPhrase.length() - 8) / 2; excerpt = "..." + indexedText.substring(Math.max(index - contextLength, 0), index); excerpt = excerpt + Ansi.ansi().fg(Ansi.Color.GREEN).toString(); excerpt = excerpt + indexedText.substring(index, index + normalizedSearchPhrase.length()); excerpt = excerpt + Ansi.ansi().reset().toString(); excerpt = excerpt + indexedText.substring( index + normalizedSearchPhrase.length(), Math.min( indexedText.length(), index + normalizedSearchPhrase.length() + contextLength)) + "..."; } } } if (metacard.getModifiedDate() != null) { modifiedDate = new DateTime(metacard.getModifiedDate().getTime()).toString(DATETIME_FORMATTER); } console.printf( formatString, metacard.getId(), modifiedDate, title.substring(0, Math.min(title.length(), TITLE_MAX_LENGTH)), excerpt); } return null; }