@Test
  public void testCommentedJpeg() throws Exception {
    InputStream stream =
        Thread.currentThread()
            .getContextClassLoader()
            .getResourceAsStream("testJPEG_commented.jpg");

    /*
     * The dates in testJPEG_commented.jpg do not contain timezones. If no timezone is specified,
     * the Tika input transformer assumes the local time zone.  Set the system timezone to UTC
     * so we can do assertions.
     */
    TimeZone defaultTimeZone = TimeZone.getDefault();
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));

    Metacard metacard = transform(stream);
    assertNotNull(metacard);
    assertThat(metacard.getTitle(), is("Tosteberga \u00C4ngar"));
    assertNotNull(metacard.getMetadata());
    assertThat(
        metacard.getMetadata(),
        containsString("<meta name=\"Keywords\" content=\"bird watching\"/>"));
    assertThat(metacard.getContentTypeName(), is("image/jpeg"));
    assertThat(convertDate(metacard.getCreatedDate()), is("2010-07-28 11:02:00 UTC"));

    // Reset timezone back to local time zone.
    TimeZone.setDefault(defaultTimeZone);
  }
  @Test
  public void testGeoTaggedJpeg() throws Exception {
    InputStream stream =
        Thread.currentThread().getContextClassLoader().getResourceAsStream("testJPEG_GEO.jpg");

    /*
     * The dates in testJPED_GEO.jpg do not contain timezones. If no timezone is specified,
     * the Tika input transformer assumes the local time zone.  Set the system timezone to UTC
     * so we can do assertions.
     */
    TimeZone defaultTimeZone = TimeZone.getDefault();
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));

    Metacard metacard = transform(stream);
    assertNotNull(metacard);
    assertNotNull(metacard.getMetadata());
    assertThat(
        metacard.getMetadata(), containsString("<meta name=\"Model\" content=\"Canon EOS 40D\"/>"));
    assertThat(metacard.getContentTypeName(), is("image/jpeg"));
    assertThat(convertDate(metacard.getCreatedDate()), is("2009-08-11 09:09:45 UTC"));
    assertThat(convertDate(metacard.getModifiedDate()), is("2009-10-02 23:02:49 UTC"));
    assertThat(
        (String) metacard.getAttribute(Metacard.GEOGRAPHY).getValue(),
        is("POINT(-54.1234 12.54321)"));

    // Reset timezone back to local time zone.
    TimeZone.setDefault(defaultTimeZone);
  }
  @Test
  public void testOpenOffice() throws Exception {
    InputStream stream =
        Thread.currentThread().getContextClassLoader().getResourceAsStream("testOpenOffice2.odt");

    /*
     * The dates in testOpenOffice2.odt do not contain timezones. If no timezone is specified,
     * the Tika input transformer assumes the local time zone.  Set the system timezone to UTC
     * so we can do assertions.
     */
    TimeZone defaultTimeZone = TimeZone.getDefault();
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));

    Metacard metacard = transform(stream);
    assertNotNull(metacard);
    assertThat(metacard.getTitle(), is("Test OpenOffice2 Document"));
    assertThat(convertDate(metacard.getCreatedDate()), is("2007-09-14 11:06:08 UTC"));
    assertThat(convertDate(metacard.getModifiedDate()), is("2013-02-13 06:52:10 UTC"));
    assertNotNull(metacard.getMetadata());
    assertThat(
        metacard.getMetadata(),
        containsString("This is a sample Open Office document, written in NeoOffice 2.2.1"));
    assertThat(metacard.getContentTypeName(), is("application/vnd.oasis.opendocument.text"));

    // Reset timezone back to local time zone.
    TimeZone.setDefault(defaultTimeZone);
  }
 @Test
 public void testGroovySource() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testGROOVY.groovy");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("this is a comment"));
   assertThat(metacard.getContentTypeName(), containsString("text/plain"));
 }
 @Test
 public void testAudioAiff() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testAIFF.aif");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("PCM_SIGNED"));
   assertThat(metacard.getContentTypeName(), is("audio/x-aiff"));
 }
 @Test
 public void testAudioMidi() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testMID.mid");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("PPQ"));
   assertThat(metacard.getContentTypeName(), is("audio/midi"));
 }
 @Test
 public void testCppSource() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testCPP.cpp");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("Hello world example"));
   assertThat(metacard.getContentTypeName(), containsString("text/plain"));
 }
 @Test
 public void testXml() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testXML.xml");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Test Document"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2000-12-01 00:00:00 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("John Smith"));
   assertThat(metacard.getContentTypeName(), is("application/xml"));
 }
 @Test
 public void testJavaSource() throws Exception {
   InputStream stream =
       Thread.currentThread()
           .getContextClassLoader()
           .getResourceAsStream("testpackage/testJAVA.java");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("HelloWorld"));
   assertThat(metacard.getContentTypeName(), containsString("text/plain"));
 }
 @Test
 public void testPng() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testPNG.png");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("<meta name=\"Compression Lossless\" content=\"true\"/>"));
   assertThat(metacard.getContentTypeName(), is("image/png"));
 }
 @Test
 public void testBmp() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testBMP.bmp");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("<meta name=\"Compression CompressionTypeName\" content=\"BI_RGB\"/>"));
   assertThat(metacard.getContentTypeName(), is("image/x-ms-bmp"));
 }
 @Test
 public void testTiff() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testTIFF.tif");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("<meta name=\"tiff:BitsPerSample\" content=\"8\"/>"));
   assertThat(metacard.getContentTypeName(), is("image/tiff"));
 }
 @Test
 public void testMp3() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testMP3id3v1_v2.mp3");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Test Title"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("<meta name=\"xmpDM:artist\" content=\"Test Artist\"/>"));
   assertThat(metacard.getContentTypeName(), is("audio/mpeg"));
 }
 @Test
 public void testPpt() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testPPT.ppt");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Sample Powerpoint Slide"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2007-09-14 17:33:12 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2007-09-14 19:16:39 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("Created with Microsoft"));
   assertThat(metacard.getContentTypeName(), is("application/vnd.ms-powerpoint"));
 }
 @Test
 public void testJavaClass() throws Exception {
   InputStream stream =
       Thread.currentThread()
           .getContextClassLoader()
           .getResourceAsStream("CatalogFrameworkImpl.class");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("CatalogFrameworkImpl"));
   assertNotNull(metacard.getMetadata());
   assertThat(metacard.getMetadata(), containsString("DEFAULT_RESOURCE_NOT_FOUND_MESSAGE"));
   assertThat(metacard.getContentTypeName(), is("application/java-vm"));
 }
 @Test
 public void testPDF() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testPDF.pdf");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Apache Tika - Apache Tika"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2007-09-15 09:02:31 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2007-09-15 09:02:31 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(), containsString("<meta name=\"xmpTPg:NPages\" content=\"1\"/>"));
   assertThat(metacard.getContentTypeName(), is("application/pdf"));
 }
 @Test
 public void testMp4() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testMP4.m4a");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Test Title"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2012-01-28 18:39:18 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2012-01-28 18:40:25 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("<meta name=\"xmpDM:artist\" content=\"Test Artist\"/>"));
   assertThat(metacard.getContentTypeName(), is("audio/mp4"));
 }
 @Test
 public void testXls() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testEXCEL.xls");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Simple Excel document"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2007-10-01 16:13:56 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2007-10-01 16:31:43 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("Written and saved in Microsoft Excel X for Mac Service Release 1."));
   assertThat(metacard.getContentTypeName(), is("application/vnd.ms-excel"));
 }
 protected void verifyBasics(Metacard metacard) {
   assertEquals(DEFAULT_TITLE, metacard.getTitle());
   assertEquals(DEFAULT_TYPE, metacard.getContentTypeName());
   assertEquals(DEFAULT_VERSION, metacard.getContentTypeVersion());
   assertEquals(
       sampleMetadata().replaceAll("\\s", ""), metacard.getMetadata().replaceAll("\\s", ""));
 }
 @Test
 public void testWordDoc() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testWORD.docx");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Sample Word Document"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2008-12-11 16:04:00 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2010-11-12 16:21:00 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(), containsString("<p>This is a sample Microsoft Word Document.</p>"));
   assertThat(
       metacard.getContentTypeName(),
       is("application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
 }
 @Test
 public void testPptx() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testPPT.pptx");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Attachment Test"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2010-05-04 06:43:54 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2010-06-29 06:34:35 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("content as every other file being tested for tika content parsing"));
   assertThat(
       metacard.getContentTypeName(),
       is("application/vnd.openxmlformats-officedocument.presentationml.presentation"));
 }
 @Test
 public void testXlsx() throws Exception {
   InputStream stream =
       Thread.currentThread().getContextClassLoader().getResourceAsStream("testEXCEL.xlsx");
   Metacard metacard = transform(stream);
   assertNotNull(metacard);
   assertThat(metacard.getTitle(), is("Simple Excel document"));
   assertThat(convertDate(metacard.getCreatedDate()), is("2007-10-01 16:13:56 UTC"));
   assertThat(convertDate(metacard.getModifiedDate()), is("2008-12-11 16:02:17 UTC"));
   assertNotNull(metacard.getMetadata());
   assertThat(
       metacard.getMetadata(),
       containsString("Sample Excel Worksheet - Numbers and their Squares"));
   assertThat(
       metacard.getContentTypeName(),
       is("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"));
 }
示例#23
0
  @Override
  protected Object doExecute() throws Exception {

    String formatString =
        "%1$-33s %2$-26s %3$-" + TITLE_MAX_LENGTH + "s %4$-" + EXCERPT_MAX_LENGTH + "s%n";

    CatalogFacade catalogProvider = getCatalog();

    Filter filter = null;
    if (cqlFilter != null) {
      filter = CQL.toFilter(cqlFilter);
    } else {
      if (searchPhrase == null) {
        searchPhrase = "*";
      }
      if (caseSensitive) {
        filter =
            getFilterBuilder()
                .attribute(Metacard.ANY_TEXT)
                .is()
                .like()
                .caseSensitiveText(searchPhrase);
      } else {
        filter = getFilterBuilder().attribute(Metacard.ANY_TEXT).is().like().text(searchPhrase);
      }
    }

    QueryImpl query = new QueryImpl(filter);

    query.setRequestsTotalResultsCount(true);

    if (numberOfItems > -1) {
      query.setPageSize(numberOfItems);
    }

    long start = System.currentTimeMillis();

    SourceResponse response = catalogProvider.query(new QueryRequestImpl(query));

    long end = System.currentTimeMillis();

    int size = 0;
    if (response.getResults() != null) {
      size = response.getResults().size();
    }

    console.println();
    console.printf(
        " %d result(s) out of %s%d%s in %3.3f seconds",
        (size),
        Ansi.ansi().fg(Ansi.Color.CYAN).toString(),
        response.getHits(),
        Ansi.ansi().reset().toString(),
        (end - start) / MILLISECONDS_PER_SECOND);
    console.printf(formatString, "", "", "", "");
    printHeaderMessage(String.format(formatString, ID, DATE, TITLE, EXCERPT));

    for (Result result : response.getResults()) {
      Metacard metacard = result.getMetacard();

      String title = (metacard.getTitle() != null ? metacard.getTitle() : "N/A");
      String excerpt = "N/A";
      String modifiedDate = "";

      if (searchPhrase != null) {
        if (metacard.getMetadata() != null) {
          XPathHelper helper = new XPathHelper(metacard.getMetadata());
          String indexedText = helper.getDocument().getDocumentElement().getTextContent();
          indexedText = indexedText.replaceAll("\\r\\n|\\r|\\n", " ");

          String normalizedSearchPhrase = searchPhrase.replaceAll("\\*", "");

          int index = -1;

          if (caseSensitive) {
            index = indexedText.indexOf(normalizedSearchPhrase);
          } else {
            index = indexedText.toLowerCase().indexOf(normalizedSearchPhrase.toLowerCase());
          }

          if (index != -1) {
            int contextLength = (EXCERPT_MAX_LENGTH - normalizedSearchPhrase.length() - 8) / 2;
            excerpt = "..." + indexedText.substring(Math.max(index - contextLength, 0), index);
            excerpt = excerpt + Ansi.ansi().fg(Ansi.Color.GREEN).toString();
            excerpt =
                excerpt + indexedText.substring(index, index + normalizedSearchPhrase.length());
            excerpt = excerpt + Ansi.ansi().reset().toString();
            excerpt =
                excerpt
                    + indexedText.substring(
                        index + normalizedSearchPhrase.length(),
                        Math.min(
                            indexedText.length(),
                            index + normalizedSearchPhrase.length() + contextLength))
                    + "...";
          }
        }
      }

      if (metacard.getModifiedDate() != null) {
        modifiedDate =
            new DateTime(metacard.getModifiedDate().getTime()).toString(DATETIME_FORMATTER);
      }

      console.printf(
          formatString,
          metacard.getId(),
          modifiedDate,
          title.substring(0, Math.min(title.length(), TITLE_MAX_LENGTH)),
          excerpt);
    }

    return null;
  }
示例#24
0
 private Metacard getMockMetacard() {
   Metacard metacard = mock(Metacard.class);
   when(metacard.getMetadata()).thenReturn(getSample());
   return metacard;
 }