protected String getDataImportEncoding(TableDefinition tableDefinition, File filePath) { String encoding = null; if (tableDefinition.getProperty("encoding") == null) { try { encoding = FileUtils.getCharacterEncoding(filePath); } catch (UnsupportedEncodingException e) { logger.error("Unable to read the encoding. using default encoding UTF-8", e); encoding = "UTF-8"; } catch (IOException e) { logger.error("Unable to read the encoding. using default encoding UTF-8", e); encoding = "UTF-8"; } } else { encoding = tableDefinition.getProperty("encoding"); } return encoding; }
protected final String getQuoteCharacter(TableDefinition tableDefinition) { String quoteCharVal = tableDefinition.getProperty("quotechar", this.getDefaultQuoteCharVal()); if (quoteCharVal.equals("")) { return ""; } else { return Character.toString((char) Integer.valueOf(quoteCharVal).intValue()); } }
private void blockDataFile(DataFile dataFile, RetailerSite retailerSite, int blockSize) { File filePath = null; PreprocessorHandlerContext context = null; try { dataFile.setStartTime(new Date()); // TODO srfisk 2012/07/31 Only split CSV, might split images zip // when they are very large. if (dataFile.getImportType().compareTo(ImportType.images_zip.getTableName()) != 0) { filePath = configurationService.getFileSystem().getDirectory(dataFile.getFilePath(), true); TableDefinition tableDefinition = this.getTableDefinition(retailerSite, dataFile); ImportType importType = ImportType.getTypeFromTableName(dataFile.getImportType()); String encoding = this.getDataImportEncoding(tableDefinition, filePath); String separator = Character.toString( (char) Integer.valueOf(tableDefinition.getProperty("separator", "9")).intValue()); String quotechar = this.getQuoteCharacter(tableDefinition); int blockCount = 1; int totalRowCount = 0; boolean noSplit = false; boolean done = false; context = this.createContext( retailerSite, dataFile, filePath, tableDefinition, encoding, separator, quotechar); while (!done) { context.startWriters(blockCount); context.writeHeader(); Row row = null; while ((row = context.reader.readNext()) != null && context.rowCount < blockSize) { context.writeRow(row); } context.closeWriters(); blockCount++; if (context.isEmptyBlock()) { done = true; continue; } context.createDataFiles( retailerSite, importType.getTableName(), importType.getPriority()); context.inQueueDataFiles(); } dataFile.setHost(null); dataFile.setNumRows(totalRowCount); if (!noSplit) { dataFile.setEndDate(new Date()); dataFile.setEndTime(new Date()); dataFile.setStatus(this.getEndStatus()); } else { dataFile.setStatus(FileStatus.IN_QUEUE); } } else { dataFile.setHost(null); dataFile.setStatus(FileStatus.IN_QUEUE); } dataFileRepository.update(dataFile); } catch (IOException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); } catch (CSVReaderException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_WRITE); dataFileRepository.update(dataFile); } catch (CSVWriterException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); } catch (Throwable t) { logger.error(String.format("Failed to unknown %s data file", filePath), t); dataFile.setStatus(FileStatus.ERROR_UNKNOWN); dataFileRepository.update(dataFile); } finally { if (context != null) { try { context.cleanup(); } catch (IOException e) { // Ignore, did everything we could to close it. } } } }