protected TableDefinition getFicstarTableDefinition(DataFile dataFile) throws IOException { RetailerSite retailerSite = dataFile.getRetailerSite(); File schemaFile = this.getFicstarSchemaFile(); DataImportConfig dataImportConfig = applicationContext.getBean("dataImportConfig", DataImportConfig.class); dataImportConfig.setConfigFilePath(schemaFile.getAbsolutePath()); dataImportConfig.afterPropertiesSet(); // now the config is ready. Schema schema = dataImportConfig.getSchema(); TableDefinition tableDefinition = schema.getDefinitionByDestination(dataFile.getImportType()); Assert.notNull( tableDefinition, "cannot find tableDefinition for :=" + dataFile.getImportType() + " in schema :=" + schemaFile.getAbsolutePath()); String csvFilePath = FilenameUtils.normalize( configurationService.getFileSystem().getFileSystemAsString() + "/" + dataFile.getFilePath()); tableDefinition.setProperty("path", csvFilePath); tableDefinition.setProperty("relativePath", dataFile.getFilePath()); tableDefinition.setProperty("isPathAbsolute", "true"); // some feeds like google dont have siteName == RetailerSiteName tableDefinition.setProperty("columns.siteName.defaultValue", retailerSite.getSiteName()); return tableDefinition; }
protected int countRows(RetailerSite retailerSite, DataFile dataFile) { File filePath = null; try { filePath = configurationService.getFileSystem().getDirectory(dataFile.getFilePath(), true); TableDefinition tableDefinition = this.getTableDefinition(retailerSite, dataFile); String encoding = getDataImportEncoding(tableDefinition, filePath); LineNumberReader lnr = new LineNumberReader(new InputStreamReader(new FileInputStream(filePath), encoding)); lnr.readLine(); // Skip header int numRows = 0; while (lnr.readLine() != null) { numRows++; } lnr.close(); return numRows; } catch (Exception e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); return -1; } }
public PreprocessorHandlerContext( RetailerSite retailerSite, DataFile dataFile, File fullDataFilePath, TableDefinition tableDefinition, String encoding, String separator, String quoteChar) throws IOException, CSVReaderException { this.dataFile = dataFile; this.srcFile = dataFile.getSrcFile(); this.dataFilePath = dataFile.getFilePath(); this.fullDataFilePath = fullDataFilePath; this.base = FilenameUtils.getBaseName(this.fullDataFilePath.getName()); int pos = this.base.indexOf('-'); if (pos == -1) { throw new RuntimeException("Protocol error, invalid feed file name: " + this.base); } this.type = this.base.substring(0, pos); this.base = this.base.substring(pos + 1); this.ext = FilenameUtils.getExtension(this.fullDataFilePath.getName()); this.path = this.fullDataFilePath.getParentFile().getPath(); this.encoding = encoding; this.separator = separator; this.quoteChar = quoteChar; this.reader = new CVSTranslatorReader(this.fullDataFilePath, this.encoding, this.separator, quoteChar); }
/* * (non-Javadoc) * * @see * com.dell.acs.dataimport.preprocessor.PreprocessorHandler#preprocess(com * .dell.acs.persistence.domain.DataFile) */ @Override public void preprocess(DataFile dataFile) { RetailerSite retailerSite = retailerSiteRepository.getByName(dataFile.getRetailerSite().getSiteName(), true); if (configurationService.getBooleanProperty( DataFilesDownloadManager.class, retailerSite.getSiteName() + ".blocking", false)) { int blockSize = configurationService.getIntegerProperty( DataFilesDownloadManager.class, retailerSite.getSiteName() + ".blockSize", -1); if (blockSize == -1) { throw new RuntimeException( "Protocol Error, Invalid configuration for block size discovered!"); } this.blockDataFile(dataFile, retailerSite, blockSize); } else if (configurationService.getBooleanProperty( DataFilesDownloadManager.class, retailerSite.getSiteName() + ".splitting", false)) { int splitSize = configurationService.getIntegerProperty( DataFilesDownloadManager.class, retailerSite.getSiteName() + ".splitSize", -1); if (splitSize == -1) { throw new RuntimeException( "Protocol Error, Invalid configuration for split size discovered!"); } this.splitDataFile(dataFile, retailerSite, splitSize); } else { this.noSplit(dataFile, retailerSite); } }
protected void noSplit(DataFile dataFile, RetailerSite retailerSite) { if (dataFile.getImportType().compareTo(ImportType.images_zip.getTableName()) != 0) { int numRows = this.countRows(retailerSite, dataFile); dataFile.setHost(null); dataFile.setNumRows(numRows); this.getDataFileStatisticService().initializeStatistics(dataFile); this.getDataFileStatisticService().startImport(dataFile); } else { dataFile.setNumRows(1); dataFile.setHost(null); } dataFile.setStatus(FileStatus.IN_QUEUE); dataFileRepository.update(dataFile); }
private void blockDataFile(DataFile dataFile, RetailerSite retailerSite, int blockSize) { File filePath = null; PreprocessorHandlerContext context = null; try { dataFile.setStartTime(new Date()); // TODO srfisk 2012/07/31 Only split CSV, might split images zip // when they are very large. if (dataFile.getImportType().compareTo(ImportType.images_zip.getTableName()) != 0) { filePath = configurationService.getFileSystem().getDirectory(dataFile.getFilePath(), true); TableDefinition tableDefinition = this.getTableDefinition(retailerSite, dataFile); ImportType importType = ImportType.getTypeFromTableName(dataFile.getImportType()); String encoding = this.getDataImportEncoding(tableDefinition, filePath); String separator = Character.toString( (char) Integer.valueOf(tableDefinition.getProperty("separator", "9")).intValue()); String quotechar = this.getQuoteCharacter(tableDefinition); int blockCount = 1; int totalRowCount = 0; boolean noSplit = false; boolean done = false; context = this.createContext( retailerSite, dataFile, filePath, tableDefinition, encoding, separator, quotechar); while (!done) { context.startWriters(blockCount); context.writeHeader(); Row row = null; while ((row = context.reader.readNext()) != null && context.rowCount < blockSize) { context.writeRow(row); } context.closeWriters(); blockCount++; if (context.isEmptyBlock()) { done = true; continue; } context.createDataFiles( retailerSite, importType.getTableName(), importType.getPriority()); context.inQueueDataFiles(); } dataFile.setHost(null); dataFile.setNumRows(totalRowCount); if (!noSplit) { dataFile.setEndDate(new Date()); dataFile.setEndTime(new Date()); dataFile.setStatus(this.getEndStatus()); } else { dataFile.setStatus(FileStatus.IN_QUEUE); } } else { dataFile.setHost(null); dataFile.setStatus(FileStatus.IN_QUEUE); } dataFileRepository.update(dataFile); } catch (IOException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); } catch (CSVReaderException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_WRITE); dataFileRepository.update(dataFile); } catch (CSVWriterException e) { logger.error(String.format("Failed to reading %s data file", filePath), e); dataFile.setStatus(FileStatus.ERROR_READ); dataFileRepository.update(dataFile); } catch (Throwable t) { logger.error(String.format("Failed to unknown %s data file", filePath), t); dataFile.setStatus(FileStatus.ERROR_UNKNOWN); dataFileRepository.update(dataFile); } finally { if (context != null) { try { context.cleanup(); } catch (IOException e) { // Ignore, did everything we could to close it. } } } }