protected int countRows(RetailerSite retailerSite, DataFile dataFile) { File filePath = null; try { filePath = configurationService.getFileSystem().getDirectory(dataFile.getFilePath(), true); TableDefinition tableDefinition = this.getTableDefinition(retailerSite, dataFile); String encoding = getDataImportEncoding(tableDefinition, filePath); LineNumberReader lnr = new LineNumberReader(new InputStreamReader(new FileInputStream(filePath), encoding)); lnr.readLine(); // Skip header int numRows = 0; while (lnr.readLine() != null) { numRows++; } lnr.close(); return numRows; } catch (Exception e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); return -1; } }
protected void noSplit(DataFile dataFile, RetailerSite retailerSite) { if (dataFile.getImportType().compareTo(ImportType.images_zip.getTableName()) != 0) { int numRows = this.countRows(retailerSite, dataFile); dataFile.setHost(null); dataFile.setNumRows(numRows); this.getDataFileStatisticService().initializeStatistics(dataFile); this.getDataFileStatisticService().startImport(dataFile); } else { dataFile.setNumRows(1); dataFile.setHost(null); } dataFile.setStatus(FileStatus.IN_QUEUE); dataFileRepository.update(dataFile); }
private void blockDataFile(DataFile dataFile, RetailerSite retailerSite, int blockSize) { File filePath = null; PreprocessorHandlerContext context = null; try { dataFile.setStartTime(new Date()); // TODO srfisk 2012/07/31 Only split CSV, might split images zip // when they are very large. if (dataFile.getImportType().compareTo(ImportType.images_zip.getTableName()) != 0) { filePath = configurationService.getFileSystem().getDirectory(dataFile.getFilePath(), true); TableDefinition tableDefinition = this.getTableDefinition(retailerSite, dataFile); ImportType importType = ImportType.getTypeFromTableName(dataFile.getImportType()); String encoding = this.getDataImportEncoding(tableDefinition, filePath); String separator = Character.toString( (char) Integer.valueOf(tableDefinition.getProperty("separator", "9")).intValue()); String quotechar = this.getQuoteCharacter(tableDefinition); int blockCount = 1; int totalRowCount = 0; boolean noSplit = false; boolean done = false; context = this.createContext( retailerSite, dataFile, filePath, tableDefinition, encoding, separator, quotechar); while (!done) { context.startWriters(blockCount); context.writeHeader(); Row row = null; while ((row = context.reader.readNext()) != null && context.rowCount < blockSize) { context.writeRow(row); } context.closeWriters(); blockCount++; if (context.isEmptyBlock()) { done = true; continue; } context.createDataFiles( retailerSite, importType.getTableName(), importType.getPriority()); context.inQueueDataFiles(); } dataFile.setHost(null); dataFile.setNumRows(totalRowCount); if (!noSplit) { dataFile.setEndDate(new Date()); dataFile.setEndTime(new Date()); dataFile.setStatus(this.getEndStatus()); } else { dataFile.setStatus(FileStatus.IN_QUEUE); } } else { dataFile.setHost(null); dataFile.setStatus(FileStatus.IN_QUEUE); } dataFileRepository.update(dataFile); } catch (IOException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); } catch (CSVReaderException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_WRITE); dataFileRepository.update(dataFile); } catch (CSVWriterException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); } catch (Throwable t) { logger.error(String.format("Failed to unknown %s data file", filePath), t); dataFile.setStatus(FileStatus.ERROR_UNKNOWN); dataFileRepository.update(dataFile); } finally { if (context != null) { try { context.cleanup(); } catch (IOException e) { // Ignore, did everything we could to close it. } } } }