private boolean areRecordFieldsEqual(Record record1, Record record2, List<String> fieldsToCheck) { for (String field : fieldsToCheck) { if (!record1.get(field).equals(record2.get(field))) { return false; } } return true; }
@Override public boolean process(Record record) { temporaryRecord = null; LOGGER.debug("Record received: {}", record); List fields = record.get(Fields.ATTACHMENT_BODY); if (fields.size() != 1) { LOGGER.error("Record [ {} ] had incorrect number of fields - [{}]", record, fields.size()); return false; } try { byte[] byteArray = (byte[]) fields.get(0); SeekableByteArrayInput inputStream = new SeekableByteArrayInput(byteArray); DatumReader<T> userDatumReader = new SpecificDatumReader<T>(this.recordClass); DataFileReader<T> dataFileReader = new DataFileReader<T>(inputStream, userDatumReader); temporaryRecord = dataFileReader.next(); } catch (Exception e) { LOGGER.error("Unable to process {}, exception: {}", record, e); return false; } return true; }
private void writeContainerless(Record src, OutputStream dst) { try { GenericDatumWriter datumWriter = new GenericDatumWriter(); Encoder encoder = null; Schema schema = null; for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) { Preconditions.checkNotNull(attachment); GenericContainer datum = (GenericContainer) attachment; schema = getSchema(datum, schema); assert schema != null; datumWriter.setSchema(schema); if (encoder == null) { // init if (format == Format.containerlessJSON) { encoder = EncoderFactory.get().jsonEncoder(schema, dst); } else { encoder = EncoderFactory.get().binaryEncoder(dst, null); } assert encoder != null; } datumWriter.write(datum, encoder); } encoder.flush(); } catch (IOException e) { throw new MorphlineRuntimeException(e); } }
private void writeContainer(Record src, OutputStream dst) { DataFileWriter dataFileWriter = null; try { try { Schema schema = null; for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) { Preconditions.checkNotNull(attachment); GenericContainer datum = (GenericContainer) attachment; schema = getSchema(datum, schema); assert schema != null; if (dataFileWriter == null) { // init GenericDatumWriter datumWriter = new GenericDatumWriter(schema); dataFileWriter = new DataFileWriter(datumWriter); if (codecFactory != null) { dataFileWriter.setCodec(codecFactory); } for (Map.Entry<String, String> entry : metadata.entrySet()) { dataFileWriter.setMeta(entry.getKey(), entry.getValue()); } dataFileWriter.create(schema, dst); } dataFileWriter.append(datum); } if (dataFileWriter != null) { dataFileWriter.flush(); } } catch (IOException e) { throw new MorphlineRuntimeException(e); } } finally { Closeables.closeQuietly(dataFileWriter); } }
@Override protected boolean doProcess(Record record) { if (preserveExisting && record .getFields() .containsKey( Fields.ATTACHMENT_MIME_TYPE)) {; // we must preserve the existing MIME type } else { List attachments = record.get(Fields.ATTACHMENT_BODY); if (attachments.size() > 0) { Object attachment = attachments.get(0); Preconditions.checkNotNull(attachment); InputStream stream; if (attachment instanceof byte[]) { stream = new ByteArrayInputStream((byte[]) attachment); } else { stream = (InputStream) attachment; } Metadata metadata = new Metadata(); // If you specify the resource name (the filename, roughly) with this // parameter, then Tika can use it in guessing the right MIME type String resourceName = (String) record.getFirstValue(Fields.ATTACHMENT_NAME); if (resourceName != null) { metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName); } // Provide stream's charset as hint to Tika for better auto detection String charset = (String) record.getFirstValue(Fields.ATTACHMENT_CHARSET); if (charset != null) { metadata.add(Metadata.CONTENT_ENCODING, charset); } if (includeMetaData) { for (Entry<String, Object> entry : record.getFields().entries()) { metadata.add(entry.getKey(), entry.getValue().toString()); } } String mimeType = getMediaType(stream, metadata, excludeParameters); record.replaceValues(Fields.ATTACHMENT_MIME_TYPE, mimeType); } } return super.doProcess(record); }