예제 #1
0
  private void parseImage(Image image, File file) throws Exception {
    try {
      // Detects the file type
      BodyContentHandler handler = new BodyContentHandler();
      Metadata metadata = new Metadata();
      FileInputStream inputStream = new FileInputStream(file);
      ParseContext parseContext = new ParseContext();

      // Parser
      AutoDetectParser parser = new AutoDetectParser();
      parser.parse(inputStream, handler, metadata, parseContext);

      // Image field setting
      String date;
      if (metadata.getDate(metadata.ORIGINAL_DATE) != null) {
        date = metadata.getDate(metadata.ORIGINAL_DATE).toString();
      } else if (metadata.getDate(TikaCoreProperties.CREATED) != null) {
        date = metadata.getDate(TikaCoreProperties.CREATED).toString();
      } else if (metadata.getDate(DublinCore.CREATED) != null) {
        date = metadata.getDate(DublinCore.CREATED).toString();
      } else if (metadata.getDate(TikaCoreProperties.METADATA_DATE) != null) {
        date = metadata.getDate(TikaCoreProperties.METADATA_DATE).toString();
      } else if (metadata.getDate(DublinCore.MODIFIED) != null) {
        date = metadata.getDate(DublinCore.MODIFIED).toString();
      } else {
        // Current date+time
        metadata.set(Metadata.DATE, new Date());
        date = metadata.get(Metadata.DATE);
      }
      image.setLongitude(metadata.get(Geographic.LONGITUDE));
      image.setLatitude(metadata.get(Geographic.LATITUDE));
      ImageOperations.setMetadataParsingFinished();

      if (date != null) {
        image.setDate(date.toString());
      } else {
        image.setDate(null);
      }
      image.setLongitude(image.getLongitude());
      image.setLatitude(image.getLatitude());
      aPII.reverseGeocode(image);
      ImageOperations.setReverseGeocodeFinished();
      ImageOperations iO = new ImageOperations();
      iO.doOCR(image, file);
      ImageOperations.setOcrFinished();

    } catch (IOException e) {
      System.out.println(e.getMessage());
    } catch (TikaException te) {
      System.out.println(te.getMessage());
    } catch (SAXException se) {
      System.out.println(se.getMessage());
    } catch (InterruptedException ie) {
      System.out.println(ie.getMessage());
    } catch (IM4JavaException je) {
      je.printStackTrace();
    }
  }
  /**
   * {@inheritDoc}
   *
   * @see
   *     org.sakaiproject.nakamura.api.solr.IndexingHandler#getDocuments(org.sakaiproject.nakamura.api.solr.RepositorySession,
   *     org.osgi.service.event.Event)
   */
  public Collection<SolrInputDocument> getDocuments(
      RepositorySession repositorySession, Event event) {
    LOGGER.debug("GetDocuments for {} ", event);
    String path = (String) event.getProperty("path");
    if (ignorePath(path)) {
      return Collections.emptyList();
    }
    List<SolrInputDocument> documents = Lists.newArrayList();
    if (path != null) {
      try {
        Session session = repositorySession.adaptTo(Session.class);
        ContentManager contentManager = session.getContentManager();
        Content content = contentManager.get(path);
        if (content != null) {
          SolrInputDocument doc = new SolrInputDocument();

          Map<String, Object> properties = content.getProperties();

          for (Entry<String, Object> p : properties.entrySet()) {
            String indexName = index(p);
            if (indexName != null) {
              for (Object o : convertToIndex(p)) {
                doc.addField(indexName, o);
              }
            }
          }

          InputStream contentStream = contentManager.getInputStream(path);
          if (contentStream != null) {
            try {
              String extracted = tika.parseToString(contentStream);
              doc.addField("content", extracted);
            } catch (TikaException e) {
              LOGGER.warn(e.getMessage(), e);
            }
          }

          doc.addField(_DOC_SOURCE_OBJECT, content);
          documents.add(doc);
        }
      } catch (ClientPoolException e) {
        LOGGER.warn(e.getMessage(), e);
      } catch (StorageClientException e) {
        LOGGER.warn(e.getMessage(), e);
      } catch (AccessDeniedException e) {
        LOGGER.warn(e.getMessage(), e);
      } catch (IOException e) {
        LOGGER.warn(e.getMessage(), e);
      }
    }
    LOGGER.debug("Got documents {} ", documents);
    return documents;
  }
예제 #3
0
  private Metadata getMetadataFromTika(Product product) throws MetExtractionException {
    try {
      File file = getProductFile(product);
      FileInputStream inputStream = new FileInputStream(file);
      org.apache.tika.metadata.Metadata tikaMetadata = new org.apache.tika.metadata.Metadata();
      Parser parser = new AutoDetectParser();
      parser.parse(inputStream, new DefaultHandler(), tikaMetadata, new ParseContext());
      return transform(tikaMetadata);

    } catch (FileNotFoundException e) {
      throw new MetExtractionException("Unable to find file: Reason: " + e.getMessage());
    } catch (TikaException e) {
      throw new MetExtractionException("Unable to parse the document: Reason: " + e.getMessage());
    } catch (SAXException e) {
      throw new MetExtractionException(
          " Unable to process the SAX events : Reason: " + e.getMessage());
    } catch (IOException e) {
      throw new MetExtractionException(
          "Unable to read the document stream: Reason: " + e.getMessage());
    }
  }