public static WebDocument getDocument(Page page) {
    InputStream stream = new ByteArrayInputStream(page.getContentBytes());
    try {
      Metadata metadata = new Metadata();
      metadata.set(Metadata.CONTENT_TYPE, page.getContentType());
      metadata.set(Metadata.CONTENT_LOCATION, page.getURL().toString());
      metadata.set(Metadata.LOCATION, page.getURL().toString());
      metadata.set(Metadata.MIME_TYPE_MAGIC, page.getContentType());
      metadata.set(Metadata.CONTENT_ENCODING, page.getContentEncoding());
      metadata.set(Metadata.TIKA_MIME_FILE, page.getContentType());

      String text = getTika().parseToString(stream, metadata);

      WebDocument wd =
          new WebDocument(0, metadata.get(Metadata.TITLE), text, page.getURL().toString());
      stream.close();
      return wd;
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (TikaException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        stream.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } // close the stream
    }

    return null;
  }
Ejemplo n.º 2
0
  private void manageDetails(final GetItemResponse response, final DataHandler stream) {

    InputStream is = null;
    ContentHandler contenthandler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    // metadata.set(Metadata.RESOURCE_NAME_KEY, f.getName());
    Parser parser = new AutoDetectParser();
    ParseContext context = new ParseContext();

    try {
      is = stream.getInputStream();

      parser.parse(is, contenthandler, metadata, context);
      is.close();
      is.reset();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (SAXException e) {
      e.printStackTrace();
    } catch (TikaException e) {
      e.printStackTrace();
    }

    String contentAuthorValue = metadata.get(Metadata.AUTHOR);
    String contentAuthorKey = currentProperties.getProperty(KpeopleLabel.getCorePropertiesAuthor());
    if (contentAuthorValue != null) {
      eventResult.setDetail(contentAuthorKey, contentAuthorValue);
    }

    String contentCreationDateValue = metadata.get(Metadata.CREATION_DATE);
    String contentCreationDateKey =
        currentProperties.getProperty(KpeopleLabel.getCorePropertiesCreationDate());
    if (contentCreationDateValue != null) {
      eventResult.setDetail(contentCreationDateKey, contentCreationDateValue);
    }

    String contentKeywordsValue = metadata.get(Metadata.KEYWORDS);
    String contentKeywordsKey =
        currentProperties.getProperty(KpeopleLabel.getCorePropertiesKeywords());
    if (contentKeywordsValue != null) {
      eventResult.setDetail(contentKeywordsKey, contentKeywordsValue);
    }

    String[] names = metadata.names();

    /*
     * for (int i = 0; i < names.length; i++) {
     * System.out.println(names[i]); }
     */

  }
Ejemplo n.º 3
0
  /** @param args */
  public static void main(String[] args) {
    // String fileLocation = "G:/asas/album/song.mp3";
    String fileLocation = "C:\\Users\\Public\\Music\\Sample Music\\Kalimba.mp3";

    try {

      InputStream input = new FileInputStream(new File(fileLocation));
      ContentHandler handler = new DefaultHandler();
      Metadata metadata = new Metadata();
      Parser parser = new Mp3Parser();
      ParseContext parseCtx = new ParseContext();
      parser.parse(input, handler, metadata, parseCtx);
      input.close();

      // List all metadata
      String[] metadataNames = metadata.names();

      for (String name : metadataNames) {
        System.out.println(name + ": " + metadata.get(name));
      }

      // Retrieve the necessary info from metadata
      // Names - title, xmpDM:artist etc. - mentioned below may differ
      // based
      System.out.println("----------------------------------------------");
      System.out.println("Title: " + metadata.get("title"));
      System.out.println("Artists: " + metadata.get("xmpDM:artist"));
      System.out.println("Composer : " + metadata.get("xmpDM:composer"));
      System.out.println("Genre : " + metadata.get("xmpDM:genre"));
      System.out.println("Album : " + metadata.get("xmpDM:album"));

    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (SAXException e) {
      e.printStackTrace();
    } catch (TikaException e) {
      e.printStackTrace();
    }
  }