public void indexFile(Document doc) throws IOException { String data64 = org.elasticsearch.common.Base64.encodeFromFile(doc.getContentFilepath()); File file = new File(doc.getContentFilepath()); InputStream fileReader = new FileInputStream(file); double bytes = file.length(); int indexedChars = 1000000; Metadata metadata = new Metadata(); byte[] buffer = new byte[1024 * 8]; ByteArrayOutputStream bos = new ByteArrayOutputStream(); int i; while (-1 != (i = fileReader.read(buffer))) { bos.write(buffer, 0, i); } byte[] data = bos.toByteArray(); String parsedContent; try { // Set the maximum length of strings returned by the parseToString method, -1 sets no limit parsedContent = tika().parseToString(new BytesStreamInput(data, false), metadata, indexedChars); } catch (IOException | TikaException e) { e.printStackTrace(); parsedContent = ""; } XContentBuilder source = jsonBuilder() .startObject() .field("file", data64) .field("filename", doc.getRootFileName()) .field("title", doc.getTitle()) .field("author", doc.getAuthor()) .field("created_date", doc.getDateCreated()) .field("content_type", FilenameUtils.getExtension(doc.getRootFileName())) .field("content_length", bytes) .field("content", parsedContent) .endObject(); IndexResponse idxResp = mClient .prepareIndex() .setIndex(idxName) .setType(idxType) .setId(doc.getHash()) .setSource(source) .setRefresh(true) .execute() .actionGet(); }