Пример #1
0
 private boolean areRecordFieldsEqual(Record record1, Record record2, List<String> fieldsToCheck) {
   for (String field : fieldsToCheck) {
     if (!record1.get(field).equals(record2.get(field))) {
       return false;
     }
   }
   return true;
 }
Пример #2
0
  @Override
  public boolean process(Record record) {
    temporaryRecord = null;
    LOGGER.debug("Record received: {}", record);

    List fields = record.get(Fields.ATTACHMENT_BODY);
    if (fields.size() != 1) {
      LOGGER.error("Record [ {} ] had incorrect number of fields - [{}]", record, fields.size());
      return false;
    }

    try {
      byte[] byteArray = (byte[]) fields.get(0);
      SeekableByteArrayInput inputStream = new SeekableByteArrayInput(byteArray);
      DatumReader<T> userDatumReader = new SpecificDatumReader<T>(this.recordClass);
      DataFileReader<T> dataFileReader = new DataFileReader<T>(inputStream, userDatumReader);
      temporaryRecord = dataFileReader.next();

    } catch (Exception e) {
      LOGGER.error("Unable to process {}, exception: {}", record, e);
      return false;
    }

    return true;
  }
 private void writeContainerless(Record src, OutputStream dst) {
   try {
     GenericDatumWriter datumWriter = new GenericDatumWriter();
     Encoder encoder = null;
     Schema schema = null;
     for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) {
       Preconditions.checkNotNull(attachment);
       GenericContainer datum = (GenericContainer) attachment;
       schema = getSchema(datum, schema);
       assert schema != null;
       datumWriter.setSchema(schema);
       if (encoder == null) { // init
         if (format == Format.containerlessJSON) {
           encoder = EncoderFactory.get().jsonEncoder(schema, dst);
         } else {
           encoder = EncoderFactory.get().binaryEncoder(dst, null);
         }
         assert encoder != null;
       }
       datumWriter.write(datum, encoder);
     }
     encoder.flush();
   } catch (IOException e) {
     throw new MorphlineRuntimeException(e);
   }
 }
 private void writeContainer(Record src, OutputStream dst) {
   DataFileWriter dataFileWriter = null;
   try {
     try {
       Schema schema = null;
       for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) {
         Preconditions.checkNotNull(attachment);
         GenericContainer datum = (GenericContainer) attachment;
         schema = getSchema(datum, schema);
         assert schema != null;
         if (dataFileWriter == null) { // init
           GenericDatumWriter datumWriter = new GenericDatumWriter(schema);
           dataFileWriter = new DataFileWriter(datumWriter);
           if (codecFactory != null) {
             dataFileWriter.setCodec(codecFactory);
           }
           for (Map.Entry<String, String> entry : metadata.entrySet()) {
             dataFileWriter.setMeta(entry.getKey(), entry.getValue());
           }
           dataFileWriter.create(schema, dst);
         }
         dataFileWriter.append(datum);
       }
       if (dataFileWriter != null) {
         dataFileWriter.flush();
       }
     } catch (IOException e) {
       throw new MorphlineRuntimeException(e);
     }
   } finally {
     Closeables.closeQuietly(dataFileWriter);
   }
 }
Пример #5
0
    @Override
    protected boolean doProcess(Record record) {
      if (preserveExisting
          && record
              .getFields()
              .containsKey(
                  Fields.ATTACHMENT_MIME_TYPE)) {; // we must preserve the existing MIME type
      } else {
        List attachments = record.get(Fields.ATTACHMENT_BODY);
        if (attachments.size() > 0) {
          Object attachment = attachments.get(0);
          Preconditions.checkNotNull(attachment);
          InputStream stream;
          if (attachment instanceof byte[]) {
            stream = new ByteArrayInputStream((byte[]) attachment);
          } else {
            stream = (InputStream) attachment;
          }

          Metadata metadata = new Metadata();

          // If you specify the resource name (the filename, roughly) with this
          // parameter, then Tika can use it in guessing the right MIME type
          String resourceName = (String) record.getFirstValue(Fields.ATTACHMENT_NAME);
          if (resourceName != null) {
            metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
          }

          // Provide stream's charset as hint to Tika for better auto detection
          String charset = (String) record.getFirstValue(Fields.ATTACHMENT_CHARSET);
          if (charset != null) {
            metadata.add(Metadata.CONTENT_ENCODING, charset);
          }

          if (includeMetaData) {
            for (Entry<String, Object> entry : record.getFields().entries()) {
              metadata.add(entry.getKey(), entry.getValue().toString());
            }
          }

          String mimeType = getMediaType(stream, metadata, excludeParameters);
          record.replaceValues(Fields.ATTACHMENT_MIME_TYPE, mimeType);
        }
      }
      return super.doProcess(record);
    }