private void parseImage(Image image, File file) throws Exception { try { // Detects the file type BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); FileInputStream inputStream = new FileInputStream(file); ParseContext parseContext = new ParseContext(); // Parser AutoDetectParser parser = new AutoDetectParser(); parser.parse(inputStream, handler, metadata, parseContext); // Image field setting String date; if (metadata.getDate(metadata.ORIGINAL_DATE) != null) { date = metadata.getDate(metadata.ORIGINAL_DATE).toString(); } else if (metadata.getDate(TikaCoreProperties.CREATED) != null) { date = metadata.getDate(TikaCoreProperties.CREATED).toString(); } else if (metadata.getDate(DublinCore.CREATED) != null) { date = metadata.getDate(DublinCore.CREATED).toString(); } else if (metadata.getDate(TikaCoreProperties.METADATA_DATE) != null) { date = metadata.getDate(TikaCoreProperties.METADATA_DATE).toString(); } else if (metadata.getDate(DublinCore.MODIFIED) != null) { date = metadata.getDate(DublinCore.MODIFIED).toString(); } else { // Current date+time metadata.set(Metadata.DATE, new Date()); date = metadata.get(Metadata.DATE); } image.setLongitude(metadata.get(Geographic.LONGITUDE)); image.setLatitude(metadata.get(Geographic.LATITUDE)); ImageOperations.setMetadataParsingFinished(); if (date != null) { image.setDate(date.toString()); } else { image.setDate(null); } image.setLongitude(image.getLongitude()); image.setLatitude(image.getLatitude()); aPII.reverseGeocode(image); ImageOperations.setReverseGeocodeFinished(); ImageOperations iO = new ImageOperations(); iO.doOCR(image, file); ImageOperations.setOcrFinished(); } catch (IOException e) { System.out.println(e.getMessage()); } catch (TikaException te) { System.out.println(te.getMessage()); } catch (SAXException se) { System.out.println(se.getMessage()); } catch (InterruptedException ie) { System.out.println(ie.getMessage()); } catch (IM4JavaException je) { je.printStackTrace(); } }
/** * {@inheritDoc} * * @see * org.sakaiproject.nakamura.api.solr.IndexingHandler#getDocuments(org.sakaiproject.nakamura.api.solr.RepositorySession, * org.osgi.service.event.Event) */ public Collection<SolrInputDocument> getDocuments( RepositorySession repositorySession, Event event) { LOGGER.debug("GetDocuments for {} ", event); String path = (String) event.getProperty("path"); if (ignorePath(path)) { return Collections.emptyList(); } List<SolrInputDocument> documents = Lists.newArrayList(); if (path != null) { try { Session session = repositorySession.adaptTo(Session.class); ContentManager contentManager = session.getContentManager(); Content content = contentManager.get(path); if (content != null) { SolrInputDocument doc = new SolrInputDocument(); Map<String, Object> properties = content.getProperties(); for (Entry<String, Object> p : properties.entrySet()) { String indexName = index(p); if (indexName != null) { for (Object o : convertToIndex(p)) { doc.addField(indexName, o); } } } InputStream contentStream = contentManager.getInputStream(path); if (contentStream != null) { try { String extracted = tika.parseToString(contentStream); doc.addField("content", extracted); } catch (TikaException e) { LOGGER.warn(e.getMessage(), e); } } doc.addField(_DOC_SOURCE_OBJECT, content); documents.add(doc); } } catch (ClientPoolException e) { LOGGER.warn(e.getMessage(), e); } catch (StorageClientException e) { LOGGER.warn(e.getMessage(), e); } catch (AccessDeniedException e) { LOGGER.warn(e.getMessage(), e); } catch (IOException e) { LOGGER.warn(e.getMessage(), e); } } LOGGER.debug("Got documents {} ", documents); return documents; }
private Metadata getMetadataFromTika(Product product) throws MetExtractionException { try { File file = getProductFile(product); FileInputStream inputStream = new FileInputStream(file); org.apache.tika.metadata.Metadata tikaMetadata = new org.apache.tika.metadata.Metadata(); Parser parser = new AutoDetectParser(); parser.parse(inputStream, new DefaultHandler(), tikaMetadata, new ParseContext()); return transform(tikaMetadata); } catch (FileNotFoundException e) { throw new MetExtractionException("Unable to find file: Reason: " + e.getMessage()); } catch (TikaException e) { throw new MetExtractionException("Unable to parse the document: Reason: " + e.getMessage()); } catch (SAXException e) { throw new MetExtractionException( " Unable to process the SAX events : Reason: " + e.getMessage()); } catch (IOException e) { throw new MetExtractionException( "Unable to read the document stream: Reason: " + e.getMessage()); } }