public void parse(
      InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context)
      throws IOException, SAXException, TikaException {

    // As we don't know which of the metadata or the content
    //  we'll hit first, catch the endDocument call initially
    EndDocumentShieldingContentHandler handler =
        new EndDocumentShieldingContentHandler(baseHandler);

    // Process the file in turn
    ZipInputStream zip = new ZipInputStream(stream);
    ZipEntry entry = zip.getNextEntry();
    while (entry != null) {
      if (entry.getName().equals("mimetype")) {
        String type = IOUtils.toString(zip, "UTF-8");
        metadata.set(Metadata.CONTENT_TYPE, type);
      } else if (entry.getName().equals("meta.xml")) {
        meta.parse(zip, new DefaultHandler(), metadata, context);
      } else if (entry.getName().endsWith("content.xml")) {
        content.parse(zip, handler, metadata, context);
      }
      entry = zip.getNextEntry();
    }

    // Only now call the end document
    if (handler.getEndDocumentWasCalled()) {
      handler.reallyEndDocument();
    }
  }
  @Override
  public final void write(List<? extends Document> documents) throws Exception {

    for (Document doc : documents) {
      String[] cmd = {
        imageMagickProg,
        "-density",
        thumbnailDensity,
        "-depth",
        "8",
        "-quality",
        "85",
        "-resize",
        "1600x800",
        pdfOutputPath + File.separator + doc.getDocName() + ".pdf[0]",
        outputPath + File.separator + doc.getDocName() + ".png"
      };

      try {
        Process process = new ProcessBuilder(cmd).start();
        IOUtils.closeQuietly(process.getOutputStream());
        InputStream processInputStream = process.getInputStream();
        logStream(processInputStream);
        FutureTask<Integer> waitTask = new FutureTask<>(process::waitFor);
        Thread waitThread = new Thread(waitTask);
        waitThread.start();
        try {
          waitTask.get(30, TimeUnit.SECONDS);
        } catch (Exception e) {
          LOG.error(e.getMessage());
          waitThread.interrupt();
          process.destroy();
          waitTask.cancel(true);
        } finally {
          IOUtils.closeQuietly(processInputStream);
          process.destroy();
          waitThread.interrupt();
          waitTask.cancel(true);
        }
      } catch (IOException e) {

      }
    }
  }
  @Test
  public void testUnmarshalGetRecordsResponseConversionWithEmptyBoundingBox() {
    XStream xstream = new XStream(new WstxDriver());
    xstream.setClassLoader(this.getClass().getClassLoader());

    GetRecordsResponseConverter grrc = new GetRecordsResponseConverter(mockProvider);
    xstream.registerConverter(grrc);
    xstream.alias("GetRecordsResponse", CswRecordCollection.class);

    String xml =
        "<csw:GetRecordsResponse xmlns:csw=\"http://www.opengis.net/cat/csw\">\r\n"
            + "  <csw:SearchStatus status=\"subset\" timestamp=\"2013-05-01T02:13:36+0200\"/>\r\n"
            + "  <csw:SearchResults elementSet=\"full\" nextRecord=\"11\" numberOfRecordsMatched=\"479\" numberOfRecordsReturned=\"10\" recordSchema=\"csw:Record\">\r\n"
            + "    <csw:Record xmlns:csw=\"http://www.opengis.net/cat/csw\">\r\n"
            + "      <dc:identifier xmlns:dc=\"http://purl.org/dc/elements/1.1/\">{8C1F6297-EC96-4302-A01E-14988C9149FD}</dc:identifier>\r\n"
            + "      <dc:title xmlns:dc=\"http://purl.org/dc/elements/1.1/\">title 1</dc:title>\r\n"
            + "      <dct:modified xmlns:dct=\"http://purl.org/dc/terms/\">2008-12-15</dct:modified>\r\n"
            + "      <dc:subject xmlns:dc=\"http://purl.org/dc/elements/1.1/\">subject 1</dc:subject>\r\n"
            + "      <dc:subject xmlns:dc=\"http://purl.org/dc/elements/1.1/\">second subject</dc:subject>\r\n"
            + "      <dct:abstract xmlns:dct=\"http://purl.org/dc/terms/\">abstract 1</dct:abstract>\r\n"
            + "      <dc:rights xmlns:dc=\"http://purl.org/dc/elements/1.1/\">copyright 1</dc:rights>\r\n"
            + "      <dc:rights xmlns:dc=\"http://purl.org/dc/elements/1.1/\">copyright 2</dc:rights>\r\n"
            + "      <dc:language xmlns:dc=\"http://purl.org/dc/elements/1.1/\">english</dc:language>      \r\n"
            + "      <ows:BoundingBox xmlns:ows=\"http://www.opengis.net/ows\" crs=\"EPSG:RD_New (28992)\">\r\n"
            + "        <ows:LowerCorner></ows:LowerCorner>\r\n"
            + "        <ows:UpperCorner></ows:UpperCorner>\r\n"
            + "      </ows:BoundingBox>   \r\n"
            + "      <dc:type xmlns:dc=\"http://purl.org/dc/elements/1.1/\">dataset</dc:type>\r\n"
            + "      <dc:format xmlns:dc=\"http://purl.org/dc/elements/1.1/\">Shapefile</dc:format> \r\n"
            + "    </csw:Record>\r\n"
            + "  </csw:SearchResults>\r\n"
            + "</csw:GetRecordsResponse>";
    InputStream inStream = IOUtils.toInputStream(xml);
    CswRecordCollection cswRecords = (CswRecordCollection) xstream.fromXML(inStream);
    IOUtils.closeQuietly(inStream);

    assertThat(cswRecords.getNumberOfRecordsReturned(), is(10L));
    assertThat(cswRecords.getNumberOfRecordsMatched(), is(479L));

    List<Metacard> metacards = cswRecords.getCswRecords();
    assertThat(metacards, not(nullValue()));
    assertThat(metacards.size(), equalTo(1));
  }
 protected void assertParseable(String filename) throws Exception {
   try (InputStream is =
       VariousDocTests.class.getResourceAsStream(
           "/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename)) {
     byte bytes[] = IOUtils.toByteArray(is);
     String parsedContent = TikaImpl.parse(bytes, new Metadata(), -1);
     assertThat(parsedContent, not(isEmptyOrNullString()));
     logger.debug("extracted content: {}", parsedContent);
   }
 }
Exemple #5
0
  public MediaType detect(TikaInputStream input, Metadata metadata) throws IOException {
    ZipFile zip = new ZipFile(input.getFile());
    for (ZipEntry entry : Collections.list(zip.entries())) {
      // Is it an Open Document file?
      if (entry.getName().equals("mimetype")) {
        InputStream stream = zip.getInputStream(entry);
        try {
          return fromString(IOUtils.toString(stream, "UTF-8"));
        } finally {
          stream.close();
        }
      } else if (entry.getName().equals("_rels/.rels")
          || entry.getName().equals("[Content_Types].xml")) {
        // Office Open XML File
        // As POI to open and investigate it for us
        try {
          OPCPackage pkg = OPCPackage.open(input.getFile().toString());
          input.setOpenContainer(pkg);

          PackageRelationshipCollection core =
              pkg.getRelationshipsByType(ExtractorFactory.CORE_DOCUMENT_REL);
          if (core.size() != 1) {
            throw new IOException(
                "Invalid OOXML Package received - expected 1 core document, found " + core.size());
          }

          // Get the type of the core document part
          PackagePart corePart = pkg.getPart(core.getRelationship(0));
          String coreType = corePart.getContentType();

          // Turn that into the type of the overall document
          String docType = coreType.substring(0, coreType.lastIndexOf('.'));
          return fromString(docType);
        } catch (InvalidFormatException e) {
          throw new IOException("Office Open XML File detected, but corrupted - " + e.getMessage());
        }
      } else if (entry.getName().equals("buildVersionHistory.plist")) {
        // This is an iWork document

        // Reset and ask
        zip.close();
        zip = new ZipFile(input.getFile());
        return IWorkPackageParser.identifyType(zip);
      } else if (entry.getName().equals("META-INF/")) {
        // Java Jar
        return MediaType.application("java-archive");
      }
    }

    return MediaType.APPLICATION_ZIP;
  }
 void assertException(String filename, String expectedMessage) throws Exception {
   try (InputStream is =
       VariousDocTests.class.getResourceAsStream(
           "/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename)) {
     byte bytes[] = IOUtils.toByteArray(is);
     TikaImpl.parse(bytes, new Metadata(), -1);
     fail("expected exception");
   } catch (Exception e) {
     if (e.getMessage() != null && e.getMessage().contains(expectedMessage)) {
       // ok
     } else {
       // unexpected
       throw e;
     }
   }
 }
  @Override
  @SuppressWarnings("unchecked")
  public String encode(List<CalendarEvent> events) {

    if (events == null || events.isEmpty()) {
      throw new IllegalArgumentException("The calendar events must be defined to encode them");
    }
    Calendar calendarIcs = new Calendar();
    calendarIcs.getProperties().add(new ProdId("-//Silverpeas//iCal4j 1.1//FR"));
    calendarIcs.getProperties().add(Version.VERSION_2_0);
    calendarIcs.getProperties().add(CalScale.GREGORIAN);
    List<VEvent> iCalEvents = new ArrayList<VEvent>();
    ByteArrayOutputStream output = new ByteArrayOutputStream(10240);
    for (CalendarEvent event : events) {
      Date startDate = anICal4JDateCodec().encode(event.getStartDate());
      Date endDate = anICal4JDateCodec().encode(event.getEndDate());
      VEvent iCalEvent;
      if (event.isOnAllDay() && startDate.equals(endDate)) {
        iCalEvent = new VEvent(startDate, event.getTitle());
      } else {
        iCalEvent = new VEvent(startDate, endDate, event.getTitle());
      }

      // Generate UID
      iCalEvent.getProperties().add(generator.generateUid());

      // Add recurring data if any
      if (event.isRecurring()) {
        CalendarEventRecurrence eventRecurrence = event.getRecurrence();
        Recur recur = anICal4JRecurrenceCodec().encode(eventRecurrence);
        iCalEvent.getProperties().add(new RRule(recur));
        iCalEvent.getProperties().add(exceptionDatesFrom(eventRecurrence));
      }
      // Add Description
      iCalEvent.getProperties().add(new Description(event.getDescription()));
      // Add Classification
      iCalEvent.getProperties().add(new Clazz(event.getAccessLevel()));
      // Add Priority
      iCalEvent.getProperties().add(new Priority(event.getPriority()));

      // Add location if any
      if (!event.getLocation().isEmpty()) {
        iCalEvent.getProperties().add(new Location(event.getLocation()));
      }

      // Add event URL if any
      if (event.getUrl() != null) {
        try {
          iCalEvent.getProperties().add(new Url(event.getUrl().toURI()));
        } catch (URISyntaxException ex) {
          throw new EncodingException(ex.getMessage(), ex);
        }
      }

      // Add Categories
      TextList categoryList = new TextList(event.getCategories().asArray());
      if (!categoryList.isEmpty()) {
        iCalEvent.getProperties().add(new Categories(categoryList));
      }
      // Add attendees
      for (String attendee : event.getAttendees().asList()) {
        try {
          iCalEvent.getProperties().add(new Attendee(attendee));
        } catch (URISyntaxException ex) {
          throw new EncodingException("Malformed attendee URI: " + attendee, ex);
        }
      }

      iCalEvents.add(iCalEvent);
    }
    calendarIcs.getComponents().addAll(iCalEvents);
    CalendarOutputter outputter = new CalendarOutputter();
    try {
      outputter.output(calendarIcs, output);
      return output.toString(CharEncoding.UTF_8);
    } catch (Exception ex) {
      throw new EncodingException(
          "The encoding of the events in iCal formatted text has failed!", ex);
    } finally {
      IOUtils.closeQuietly(output);
    }
  }
  @Test
  public void testUnmarshalParseXmlNamespaces() throws XmlPullParserException {

    XStream xstream = new XStream(new WstxDriver());
    xstream.setClassLoader(this.getClass().getClassLoader());

    xstream.registerConverter(new GetRecordsResponseConverter(mockProvider));
    xstream.alias("csw:GetRecordsResponse", CswRecordCollection.class);

    String xml =
        "<?xml version='1.0' encoding='UTF-8'?>"
            + "<csw:GetRecordsResponse "
            + "xmlns:dc=\"http://purl.org/dc/elements/1.1/\" "
            + "xmlns:dct=\"http://purl.org/dc/terms/\" "
            + "xmlns:ows=\"http://www.opengis.net/ows\" "
            + "xmlns:csw=\"http://www.opengis.net/cat/csw/2.0.2\" "
            + "version=\"2.0.2\"><csw:SearchStatus "
            + "timestamp=\"2014-11-11T10:53:32.152-06:00\"/>"
            + "<csw:SearchResults numberOfRecordsMatched=\"1\" "
            + "numberOfRecordsReturned=\"1\" nextRecord=\"0\" "
            + "recordSchema=\"http://www.opengis.net/cat/csw/2.0.2\">"
            + "<csw:Record>\n"
            + "<dc:identifier>0a2e1b1d2a3755e70a96d61e6bddbc5d</dc:identifier>"
            + "<dct:bibliographicCitation>0a2e1b1d2a3755e70a96d61e6bddbc5d</dct:bibliographicCitation>"
            + "<dc:title>US woman attacks Gauguin painting</dc:title>"
            + "<dct:alternative>US woman attacks Gauguin painting</dct:alternative>"
            + "<dc:type>video</dc:type><dc:date>2011-04-06T04:49:20.230-05:00</dc:date>"
            + "<dct:modified>2011-04-06T04:49:20.230-05:00</dct:modified>"
            + "<dct:created>2011-04-06T04:49:20.230-05:00</dct:created>"
            + "<dct:dateAccepted>2011-04-06T04:48:26.180-05:00</dct:dateAccepted>"
            + "<dct:dateCopyrighted>2011-04-06T04:48:26.180-05:00</dct:dateCopyrighted><"
            + "dct:dateSubmitted>2011-04-06T04:49:20.230-05:00</dct:dateSubmitted>"
            + "<dct:issued>2011-04-06T04:49:20.230-05:00</dct:issued>"
            + "<dc:publisher>ddf.distribution</dc:publisher>"
            + "<ows:BoundingBox crs=\"urn:x-ogc:def:crs:EPSG:6.11:4326\">\n"
            + "    <ows:LowerCorner>-50.5056430529222 84.0285103635943</ows:LowerCorner>"
            + "<ows:UpperCorner>-50.5056430529222 84.0285103635943</ows:UpperCorner>"
            + "</ows:BoundingBox></csw:Record><"
            + "/csw:SearchResults>"
            + "</csw:GetRecordsResponse>";
    InputStream inStream = IOUtils.toInputStream(xml);

    ArgumentCaptor<UnmarshallingContext> captor =
        ArgumentCaptor.forClass(UnmarshallingContext.class);

    HierarchicalStreamReader reader =
        new XppReader(
            new InputStreamReader(inStream), XmlPullParserFactory.newInstance().newPullParser());
    xstream.unmarshal(reader, null, null);
    IOUtils.closeQuietly(inStream);

    verify(mockProvider, times(1)).unmarshal(any(HierarchicalStreamReader.class), captor.capture());

    UnmarshallingContext context = captor.getValue();

    assertThat(context, notNullValue());
    assertThat(context.get(CswConstants.NAMESPACE_DECLARATIONS), is(Map.class));
    Map<String, String> namespaces = (Map) context.get(CswConstants.NAMESPACE_DECLARATIONS);
    assertThat(
        namespaces.get(
            CswConstants.XMLNS
                + CswConstants.NAMESPACE_DELIMITER
                + CswConstants.CSW_NAMESPACE_PREFIX),
        is(CswConstants.CSW_OUTPUT_SCHEMA));
    assertThat(
        namespaces.get(
            CswConstants.XMLNS
                + CswConstants.NAMESPACE_DELIMITER
                + CswConstants.DUBLIN_CORE_NAMESPACE_PREFIX),
        is(CswConstants.DUBLIN_CORE_SCHEMA));
    assertThat(
        namespaces.get(
            CswConstants.XMLNS
                + CswConstants.NAMESPACE_DELIMITER
                + CswConstants.DUBLIN_CORE_TERMS_NAMESPACE_PREFIX),
        is(CswConstants.DUBLIN_CORE_TERMS_SCHEMA));
    assertThat(
        namespaces.get(
            CswConstants.XMLNS
                + CswConstants.NAMESPACE_DELIMITER
                + CswConstants.OWS_NAMESPACE_PREFIX),
        is(CswConstants.OWS_NAMESPACE));
  }
  /**
   * This test acutally runs the full thread of calling the GetRecordsResponseConverter then calls
   * the CswInputTransformer.
   */
  @Test
  public void testUnmarshalGetRecordsResponseFull() {

    XStream xstream = new XStream(new WstxDriver());
    xstream.setClassLoader(this.getClass().getClassLoader());

    CswTransformProvider provider = new CswTransformProvider(null, mockInputManager);

    when(mockInputManager.getTransformerBySchema(anyString())).thenReturn(new CswRecordConverter());

    xstream.registerConverter(new GetRecordsResponseConverter(provider));
    xstream.alias("GetRecordsResponse", CswRecordCollection.class);

    String xml =
        "<csw:GetRecordsResponse xmlns:csw=\"http://www.opengis.net/cat/csw\">\r\n"
            + "  <csw:SearchStatus status=\"subset\" timestamp=\"2013-05-01T02:13:36+0200\"/>\r\n"
            + "  <csw:SearchResults elementSet=\"full\" nextRecord=\"11\" numberOfRecordsMatched=\"479\" numberOfRecordsReturned=\"10\" recordSchema=\"csw:Record\">\r\n"
            + "    <csw:Record xmlns:csw=\"http://www.opengis.net/cat/csw\">\r\n"
            + "      <dc:identifier xmlns:dc=\"http://purl.org/dc/elements/1.1/\">{8C1F6297-EC96-4302-A01E-14988C9149FD}</dc:identifier>\r\n"
            + "      <dc:title xmlns:dc=\"http://purl.org/dc/elements/1.1/\">title 1</dc:title>\r\n"
            + "      <dct:modified xmlns:dct=\"http://purl.org/dc/terms/\">2008-12-15</dct:modified>\r\n"
            + "      <dc:subject xmlns:dc=\"http://purl.org/dc/elements/1.1/\">subject 1</dc:subject>\r\n"
            + "      <dc:subject xmlns:dc=\"http://purl.org/dc/elements/1.1/\">second subject</dc:subject>\r\n"
            + "      <dct:abstract xmlns:dct=\"http://purl.org/dc/terms/\">abstract 1</dct:abstract>\r\n"
            + "      <dc:rights xmlns:dc=\"http://purl.org/dc/elements/1.1/\">copyright 1</dc:rights>\r\n"
            + "      <dc:rights xmlns:dc=\"http://purl.org/dc/elements/1.1/\">copyright 2</dc:rights>\r\n"
            + "      <dc:language xmlns:dc=\"http://purl.org/dc/elements/1.1/\">english</dc:language>      \r\n"
            + "      <ows:BoundingBox xmlns:ows=\"http://www.opengis.net/ows\" crs=\"EPSG:RD_New (28992)\">\r\n"
            + "        <ows:LowerCorner>5.121 52.139</ows:LowerCorner>\r\n"
            + "        <ows:UpperCorner>4.468 52.517</ows:UpperCorner>\r\n"
            + "      </ows:BoundingBox>   \r\n"
            + "      <dc:type xmlns:dc=\"http://purl.org/dc/elements/1.1/\">dataset</dc:type>\r\n"
            + "      <dc:format xmlns:dc=\"http://purl.org/dc/elements/1.1/\">Shapefile</dc:format> \r\n"
            + "    </csw:Record>\r\n"
            + "    <csw:Record xmlns:csw=\"http://www.opengis.net/cat/csw\">\r\n"
            + "      <dc:identifier xmlns:dc=\"http://purl.org/dc/elements/1.1/\">{23362852-F370-4369-B0B2-BE74B2859614}</dc:identifier>\r\n"
            + "      <dc:title xmlns:dc=\"http://purl.org/dc/elements/1.1/\">mc2 title</dc:title>\r\n"
            + "      <dct:modified xmlns:dct=\"http://purl.org/dc/terms/\">2010-12-15</dct:modified>\r\n"
            + "      <dc:subject xmlns:dc=\"http://purl.org/dc/elements/1.1/\">first subject</dc:subject>\r\n"
            + "      <dc:subject xmlns:dc=\"http://purl.org/dc/elements/1.1/\">subject 2</dc:subject>\r\n"
            + "      <dct:abstract xmlns:dct=\"http://purl.org/dc/terms/\">mc2 abstract</dct:abstract>\r\n"
            + "      <dc:rights xmlns:dc=\"http://purl.org/dc/elements/1.1/\">first copyright</dc:rights>\r\n"
            + "      <dc:rights xmlns:dc=\"http://purl.org/dc/elements/1.1/\">second copyright</dc:rights>\r\n"
            + "      <dc:language xmlns:dc=\"http://purl.org/dc/elements/1.1/\">english</dc:language>\r\n"
            + "      <ows:BoundingBox xmlns:ows=\"http://www.opengis.net/ows\" crs=\"EPSG:RD_New (28992)\">\r\n"
            + "        <ows:LowerCorner>6.121 53.139</ows:LowerCorner>\r\n"
            + "        <ows:UpperCorner>5.468 53.517</ows:UpperCorner>\r\n"
            + "      </ows:BoundingBox>\r\n"
            + "      <dc:type xmlns:dc=\"http://purl.org/dc/elements/1.1/\">dataset 2</dc:type>\r\n"
            + "      <dc:format xmlns:dc=\"http://purl.org/dc/elements/1.1/\">Shapefile 2</dc:format>\r\n"
            + "    </csw:Record>\r\n"
            + "  </csw:SearchResults>\r\n"
            + "</csw:GetRecordsResponse>";
    InputStream inStream = IOUtils.toInputStream(xml);
    CswRecordCollection cswRecords = (CswRecordCollection) xstream.fromXML(inStream);
    IOUtils.closeQuietly(inStream);

    List<Metacard> metacards = cswRecords.getCswRecords();
    assertThat(metacards, not(nullValue()));
    assertThat(metacards.size(), equalTo(2));

    // verify first metacard's values
    Metacard mc = metacards.get(0);
    assertThat(mc, not(nullValue()));
    Map<String, Object> expectedValues = new HashMap<String, Object>();
    expectedValues.put(Metacard.ID, "{8C1F6297-EC96-4302-A01E-14988C9149FD}");
    expectedValues.put(
        CswRecordMetacardType.CSW_IDENTIFIER,
        new String[] {"{8C1F6297-EC96-4302-A01E-14988C9149FD}"});
    expectedValues.put(Metacard.TITLE, "title 1");
    expectedValues.put(CswRecordMetacardType.CSW_TITLE, new String[] {"title 1"});
    String expectedModifiedDateStr = "2008-12-15";
    DateTimeFormatter dateFormatter = ISODateTimeFormat.dateOptionalTimeParser();
    Date expectedModifiedDate = dateFormatter.parseDateTime(expectedModifiedDateStr).toDate();
    expectedValues.put(CswRecordMetacardType.CSW_MODIFIED, new String[] {expectedModifiedDateStr});
    expectedValues.put(Metacard.MODIFIED, expectedModifiedDate);
    expectedValues.put(
        CswRecordMetacardType.CSW_SUBJECT, new String[] {"subject 1", "second subject"});
    expectedValues.put(CswRecordMetacardType.CSW_ABSTRACT, new String[] {"abstract 1"});
    expectedValues.put(
        CswRecordMetacardType.CSW_RIGHTS, new String[] {"copyright 1", "copyright 2"});
    expectedValues.put(CswRecordMetacardType.CSW_LANGUAGE, new String[] {"english"});
    expectedValues.put(CswRecordMetacardType.CSW_TYPE, "dataset");
    expectedValues.put(CswRecordMetacardType.CSW_FORMAT, new String[] {"Shapefile"});
    expectedValues.put(
        Metacard.GEOGRAPHY,
        "POLYGON((52.139 5.121, 52.517 5.121, 52.517 4.468, 52.139 4.468, 52.139 5.121))");
    expectedValues.put(
        CswRecordMetacardType.OWS_BOUNDING_BOX,
        new String[] {
          "POLYGON((52.139 5.121, 52.517 5.121, 52.517 4.468, 52.139 4.468, 52.139 5.121))"
        });
    assertMetacard(mc, expectedValues);

    expectedValues.clear();

    // verify second metacard's values
    mc = metacards.get(1);
    assertThat(mc, not(nullValue()));
    expectedValues = new HashMap<String, Object>();
    expectedValues.put(Metacard.ID, "{23362852-F370-4369-B0B2-BE74B2859614}");
    expectedValues.put(
        CswRecordMetacardType.CSW_IDENTIFIER,
        new String[] {"{23362852-F370-4369-B0B2-BE74B2859614}"});
    expectedValues.put(Metacard.TITLE, "mc2 title");
    expectedValues.put(CswRecordMetacardType.CSW_TITLE, new String[] {"mc2 title"});
    expectedModifiedDateStr = "2010-12-15";
    dateFormatter = ISODateTimeFormat.dateOptionalTimeParser();
    expectedModifiedDate = dateFormatter.parseDateTime(expectedModifiedDateStr).toDate();
    expectedValues.put(CswRecordMetacardType.CSW_MODIFIED, new String[] {expectedModifiedDateStr});
    expectedValues.put(Metacard.MODIFIED, expectedModifiedDate);
    expectedValues.put(
        CswRecordMetacardType.CSW_SUBJECT, new String[] {"first subject", "subject 2"});
    expectedValues.put(CswRecordMetacardType.CSW_ABSTRACT, new String[] {"mc2 abstract"});
    expectedValues.put(
        CswRecordMetacardType.CSW_RIGHTS, new String[] {"first copyright", "second copyright"});
    expectedValues.put(CswRecordMetacardType.CSW_LANGUAGE, new String[] {"english"});
    expectedValues.put(CswRecordMetacardType.CSW_TYPE, "dataset 2");
    expectedValues.put(CswRecordMetacardType.CSW_FORMAT, new String[] {"Shapefile 2"});
    expectedValues.put(
        Metacard.GEOGRAPHY,
        "POLYGON((53.139 6.121, 53.517 6.121, 53.517 5.468, 53.139 5.468, 53.139 6.121))");
    expectedValues.put(
        CswRecordMetacardType.OWS_BOUNDING_BOX,
        new String[] {
          "POLYGON((53.139 6.121, 53.517 6.121, 53.517 5.468, 53.139 5.468, 53.139 6.121))"
        });
    assertMetacard(mc, expectedValues);

    expectedValues.clear();
  }
Exemple #10
0
    public void parseEmbedded(
        InputStream inputStream,
        ContentHandler contentHandler,
        Metadata metadata,
        boolean outputHtml)
        throws SAXException, IOException {
      String name = metadata.get(Metadata.RESOURCE_NAME_KEY);

      if (name == null) {
        name = "file" + count++;
      }

      MediaType contentType = detector.detect(inputStream, metadata);

      if (name.indexOf('.') == -1 && contentType != null) {
        try {
          name += config.getMimeRepository().forName(contentType.toString()).getExtension();
        } catch (MimeTypeException e) {
          e.printStackTrace();
        }
      }

      String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID);
      if (relID != null && !name.startsWith(relID)) {
        name = relID + "_" + name;
      }

      File outputFile = new File(extractDir, name);
      File parent = outputFile.getParentFile();
      if (!parent.exists()) {
        if (!parent.mkdirs()) {
          throw new IOException("unable to create directory \"" + parent + "\"");
        }
      }
      System.out.println("Extracting '" + name + "' (" + contentType + ") to " + outputFile);

      FileOutputStream os = null;

      try {
        os = new FileOutputStream(outputFile);

        if (inputStream instanceof TikaInputStream) {
          TikaInputStream tin = (TikaInputStream) inputStream;

          if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
            POIFSFileSystem fs = new POIFSFileSystem();
            copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
            fs.writeFilesystem(os);
          } else {
            IOUtils.copy(inputStream, os);
          }
        } else {
          IOUtils.copy(inputStream, os);
        }
      } catch (Exception e) {
        logger.warn("Ignoring unexpected exception trying to save embedded file " + name, e);
      } finally {
        if (os != null) {
          os.close();
        }
      }
    }