@Test public void test_db_load_mysql() { ArbitrateConfigRegistry.regist(configClientService); dbLoadAction = (DbLoadAction) TestedObject.getSpringBeanFactory().getBean("dbLoadAction"); final Channel channel = new Channel(); channel.setId(1L); final Pipeline pipeline = new Pipeline(); pipeline.setId(100L); List<DataMediaPair> pairs = generatorDataMediaPairForMysql(20); pipeline.setPairs(pairs); pipeline.getParameters().merge(new SystemParameter()); pipeline.getParameters().merge(new ChannelParameter()); // pipeline.getParameters().setChannelInfo("LJH_DEMO"); // final Pipeline oppositePipeline = new Pipeline(); // oppositePipeline.setId(101L); channel.setPipelines(Arrays.asList(pipeline)); final Node currentNode = new Node(); currentNode.setId(1L); new NonStrictExpectations() { { configClientService.findChannel(anyLong); returns(channel); configClientService.findPipeline(anyLong); returns(pipeline); configClientService.currentNode(); returns(currentNode); } }; Identity identity = new Identity(); identity.setChannelId(100L); identity.setPipelineId(100L); identity.setProcessId(100L); RowBatch rowBatch = new RowBatch(); rowBatch.setIdentity(identity); List<EventData> eventDatas = generatorEventDataForMysql(0, 20, EventType.INSERT); for (EventData eventData : eventDatas) { rowBatch.merge(eventData); } eventDatas = generatorEventDataForMysql(10, 10, EventType.INSERT); for (EventData eventData : eventDatas) { rowBatch.merge(eventData); } eventDatas = generatorEventDataForMysql(19, 1, EventType.DELETE); for (EventData eventData : eventDatas) { rowBatch.merge(eventData); } WeightController controller = new WeightController(1); dbLoadAction.load(rowBatch, controller); }
// ======================== help method =================== // 保存对应的dbBatch private HttpPipeKey saveDbBatch(DbBatch dbBatch) { RowBatch rowBatch = dbBatch.getRowBatch(); // 转化为proto对象 BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder(); rowBatchBuilder.setIdentity(build(rowBatch.getIdentity())); // 处理具体的字段rowData for (EventData eventData : rowBatch.getDatas()) { BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder(); rowDataBuilder.setPairId(eventData.getPairId()); rowDataBuilder.setTableId(eventData.getTableId()); if (eventData.getSchemaName() != null) { rowDataBuilder.setSchemaName(eventData.getSchemaName()); } rowDataBuilder.setTableName(eventData.getTableName()); rowDataBuilder.setEventType(eventData.getEventType().getValue()); rowDataBuilder.setExecuteTime(eventData.getExecuteTime()); // add by ljh at 2012-10-31 if (eventData.getSyncMode() != null) { rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue()); } if (eventData.getSyncConsistency() != null) { rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue()); } // 构造key column for (EventColumn keyColumn : eventData.getKeys()) { rowDataBuilder.addKeys(buildColumn(keyColumn)); } // 构造old key column if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) { for (EventColumn keyColumn : eventData.getOldKeys()) { rowDataBuilder.addOldKeys(buildColumn(keyColumn)); } } // 构造其他 column for (EventColumn column : eventData.getColumns()) { rowDataBuilder.addColumns(buildColumn(column)); } rowDataBuilder.setRemedy(eventData.isRemedy()); rowDataBuilder.setSize(eventData.getSize()); rowBatchBuilder.addRows(rowDataBuilder.build()); // 添加一条rowData记录 } // 处理下FileBatch FileBatch fileBatch = dbBatch.getFileBatch(); BatchProto.FileBatch.Builder fileBatchBuilder = null; fileBatchBuilder = BatchProto.FileBatch.newBuilder(); fileBatchBuilder.setIdentity(build(fileBatch.getIdentity())); // 构造对应的proto对象 for (FileData fileData : fileBatch.getFiles()) { BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder(); fileDataBuilder.setPairId(fileData.getPairId()); fileDataBuilder.setTableId(fileData.getTableId()); if (fileData.getNameSpace() != null) { fileDataBuilder.setNamespace(fileData.getNameSpace()); } if (fileData.getPath() != null) { fileDataBuilder.setPath(fileData.getPath()); } fileDataBuilder.setEventType(fileData.getEventType().getValue()); fileDataBuilder.setSize(fileData.getSize()); fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime()); fileBatchBuilder.addFiles(fileDataBuilder.build()); // 添加一条fileData记录 } // 处理构造对应的文件url String filename = buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass())); // 写入数据 File file = new File(htdocsDir, filename); OutputStream output = null; try { output = new BufferedOutputStream(new FileOutputStream(file)); com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto = rowBatchBuilder.build(); output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize())); // 输出大小 rowBatchProto.writeTo(output); // 输出row batch com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto = fileBatchBuilder.build(); output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize())); // 输出大小 fileBatchProto.writeTo(output); // 输出file batch output.flush(); } catch (IOException e) { throw new PipeException("write_byte_error", e); } finally { IOUtils.closeQuietly(output); } HttpPipeKey key = new HttpPipeKey(); key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename)); key.setDataType(PipeDataType.DB_BATCH); key.setIdentity(rowBatch.getIdentity()); Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId()); if (pipeline.getParameters().getUseFileEncrypt()) { // 加密处理 EncryptedData encryptedData = encryptFile(file); key.setKey(encryptedData.getKey()); key.setCrc(encryptedData.getCrc()); } return key; }
// 处理对应的dbBatch private DbBatch getDbBatch(HttpPipeKey key) { String dataUrl = key.getUrl(); Pipeline pipeline = configClientService.findPipeline(key.getIdentity().getPipelineId()); DataRetriever dataRetriever = dataRetrieverFactory.createRetriever( pipeline.getParameters().getRetriever(), dataUrl, downloadDir); File archiveFile = null; try { dataRetriever.connect(); dataRetriever.doRetrieve(); archiveFile = dataRetriever.getDataAsFile(); } catch (Exception e) { dataRetriever.abort(); throw new PipeException("download_error", e); } finally { dataRetriever.disconnect(); } // 处理下有加密的数据 if (StringUtils.isNotEmpty(key.getKey()) && StringUtils.isNotEmpty(key.getCrc())) { decodeFile(archiveFile, key.getKey(), key.getCrc()); } InputStream input = null; JSONReader reader = null; try { input = new BufferedInputStream(new FileInputStream(archiveFile)); DbBatch dbBatch = new DbBatch(); byte[] lengthBytes = new byte[4]; input.read(lengthBytes); int length = ByteUtils.bytes2int(lengthBytes); BatchProto.RowBatch rowbatchProto = BatchProto.RowBatch.parseFrom(new LimitedInputStream(input, length)); // 构造原始的model对象 RowBatch rowBatch = new RowBatch(); rowBatch.setIdentity(build(rowbatchProto.getIdentity())); for (BatchProto.RowData rowDataProto : rowbatchProto.getRowsList()) { EventData eventData = new EventData(); eventData.setPairId(rowDataProto.getPairId()); eventData.setTableId(rowDataProto.getTableId()); eventData.setTableName(rowDataProto.getTableName()); eventData.setSchemaName(rowDataProto.getSchemaName()); eventData.setEventType(EventType.valuesOf(rowDataProto.getEventType())); eventData.setExecuteTime(rowDataProto.getExecuteTime()); // add by ljh at 2012-10-31 if (StringUtils.isNotEmpty(rowDataProto.getSyncMode())) { eventData.setSyncMode(SyncMode.valuesOf(rowDataProto.getSyncMode())); } if (StringUtils.isNotEmpty(rowDataProto.getSyncConsistency())) { eventData.setSyncConsistency(SyncConsistency.valuesOf(rowDataProto.getSyncConsistency())); } // 处理主键 List<EventColumn> keys = new ArrayList<EventColumn>(); for (BatchProto.Column columnProto : rowDataProto.getKeysList()) { keys.add(buildColumn(columnProto)); } eventData.setKeys(keys); // 处理old主键 if (CollectionUtils.isEmpty(rowDataProto.getOldKeysList()) == false) { List<EventColumn> oldKeys = new ArrayList<EventColumn>(); for (BatchProto.Column columnProto : rowDataProto.getOldKeysList()) { oldKeys.add(buildColumn(columnProto)); } eventData.setOldKeys(oldKeys); } // 处理具体的column value List<EventColumn> columns = new ArrayList<EventColumn>(); for (BatchProto.Column columnProto : rowDataProto.getColumnsList()) { columns.add(buildColumn(columnProto)); } eventData.setColumns(columns); eventData.setRemedy(rowDataProto.getRemedy()); eventData.setSize(rowDataProto.getSize()); // 添加到总记录 rowBatch.merge(eventData); } dbBatch.setRowBatch(rowBatch); input.read(lengthBytes); length = ByteUtils.bytes2int(lengthBytes); BatchProto.FileBatch filebatchProto = BatchProto.FileBatch.parseFrom(new LimitedInputStream(input, length)); // 构造原始的model对象 FileBatch fileBatch = new FileBatch(); fileBatch.setIdentity(build(filebatchProto.getIdentity())); for (BatchProto.FileData fileDataProto : filebatchProto.getFilesList()) { FileData fileData = new FileData(); fileData.setPairId(fileDataProto.getPairId()); fileData.setTableId(fileDataProto.getTableId()); fileData.setEventType(EventType.valuesOf(fileDataProto.getEventType())); fileData.setLastModifiedTime(fileDataProto.getLastModifiedTime()); fileData.setNameSpace(fileDataProto.getNamespace()); fileData.setPath(fileDataProto.getPath()); fileData.setSize(fileDataProto.getSize()); // 添加到filebatch中 fileBatch.getFiles().add(fileData); } dbBatch.setFileBatch(fileBatch); return dbBatch; } catch (IOException e) { throw new PipeException("deserial_error", e); } finally { IOUtils.closeQuietly(reader); } }
@Override public void extract(DbBatch dbBatch) throws ExtractException { Assert.notNull(dbBatch); Assert.notNull(dbBatch.getRowBatch()); // 读取配置 Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId()); boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia(); boolean isRow = pipeline.getParameters().getSyncMode().isRow(); // 如果是行记录是必须进行数据库反查 // 读取一次配置 adjustPoolSize(pipeline.getParameters().getExtractPoolSize()); // 调整下线程池,Extractor会被池化处理 ExecutorCompletionService completionService = new ExecutorCompletionService(executor); // 进行并发提交 ExtractException exception = null; // 每个表进行处理 List<DataItem> items = new ArrayList<DataItem>(); List<Future> futures = new ArrayList<Future>(); List<EventData> eventDatas = dbBatch.getRowBatch().getDatas(); for (EventData eventData : eventDatas) { if (eventData.getEventType().isDdl()) { continue; } DataItem item = new DataItem(eventData); // 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询 boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia()); // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查 if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) { DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId()); if (dataMedia.getSource().getType().isOracle()) { flag |= true; eventData.setRemedy(true); // 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录 } } if (isRow && !flag) { // 提前判断一次,避免进入多线程进行竞争 // 针对view视图的情况,会有后续再判断一次 flag = checkNeedDbForRowMode(pipeline, eventData); } if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) { // 判断是否需要反查 Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null); // 提交进行并行查询 if (future.isDone()) { // 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常 try { future.get(); } catch (InterruptedException e) { cancel(futures); // 取消完之后立马退出 throw new ExtractException(e); } catch (ExecutionException e) { cancel(futures); // 取消完之后立马退出 throw new ExtractException(e); } } futures.add(future); // 记录一下添加的任务 } items.add(item); // 按顺序添加 } // 开始处理结果 int index = 0; while (index < futures.size()) { // 循环处理发出去的所有任务 try { Future future = completionService.take(); // 它也可能被打断 future.get(); } catch (InterruptedException e) { exception = new ExtractException(e); break; // 如何一个future出现了异常,就退出 } catch (ExecutionException e) { exception = new ExtractException(e); break; // 如何一个future出现了异常,就退出 } index++; } if (index < futures.size()) { // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录 cancel(futures); throw exception; } else { // 全部成功分支, 构造返回结果也要保证原始的顺序 for (int i = 0; i < items.size(); i++) { DataItem item = items.get(i); if (item.filter) { // 忽略需要被过滤的数据,比如数据库反查时记录已经不存在 eventDatas.remove(item.getEventData()); } } } }
public void run() { try { MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeline.getId())); Thread.currentThread() .setName(String.format(WORKER_NAME_FORMAT, pipeline.getId(), pipeline.getName())); // 获取数据表信息 DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId()); DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource()); Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName()); TableData keyTableData = buildTableData(table, eventData.getKeys()); // oracle类型特殊处理下 if (dbDialect instanceof OracleDialect) { keyTableData.columnTypes = getOraclePkTypes(table, keyTableData.columnNames); } boolean needAll = pipeline.getParameters().getSyncMode().isRow() || (eventData.getSyncMode() != null && eventData.getSyncMode().isRow()); // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查,获取所有字段 needAll |= CollectionUtils.isEmpty(eventData.getUpdatedColumns()) && dataMedia.getSource().getType().isOracle(); List<DataMediaPair> mediaParis = ConfigHelper.findDataMediaPairByMediaId(pipeline, dataMedia.getId()); List<String> viewColumnNames = buildMaxColumnsFromColumnPairs(mediaParis, eventData.getKeys()); // TODO 后续版本测试下 // if (needAll) { // boolean needDb = checkNeedDbForRowMode(table, // viewColumnNames, eventData); // if (needAll && !needDb) {// 不需要进行反查 // item.setFilter(false); // return; // } // } // modified by ljh at 2012-11-04 // 反查数据时只反查带update=true标识的数据,因为update=false的记录可能只是进行filter需要用到的数据,不需要反查 TableData columnTableData = buildTableData(table, eventData.getUpdatedColumns(), needAll, viewColumnNames); if (columnTableData.columnNames.length == 0) { // 全主键,不需要进行反查 } else { List<String> newColumnValues = select( dbDialect, eventData.getSchemaName(), eventData.getTableName(), keyTableData, columnTableData); if (newColumnValues == null) { // miss from db // 设置为filter=true,可能存在丢数据的风险. // 比如针对源库发生主备切换,otter反查的是备库,查询不到对应的记录 // item.setFilter(true); // 针对需要自定义反查数据库的,允许忽略 // a. 自由门触发的数据,不存在时可以忽略 // b. 回环补救算法触发的数据,不存在时可以忽略 boolean needFilter = eventData.isRemedy() || pipeline.getParameters().getSkipNoRow(); item.setFilter(needFilter); // 判断主键是否有变更,如果变更了,就原样返回item int index = 0; for (EventColumn oldKey : eventData.getOldKeys()) { if (!oldKey.equals(eventData.getKeys().get(index))) { item.setFilter(false); break; } } } else { // 构造反查的返回结果 List<EventColumn> newEventColumns = new ArrayList<EventColumn>(); for (int i = 0; i < newColumnValues.size(); i++) { EventColumn column = new EventColumn(); column.setIndex(columnTableData.indexs[i]); column.setColumnName(columnTableData.columnNames[i]); column.setColumnType(columnTableData.columnTypes[i]); column.setNull(newColumnValues.get(i) == null); column.setColumnValue(newColumnValues.get(i)); column.setUpdate(true); newEventColumns.add(column); } // 处理下columns中不在反查字段内的字段列表 for (EventColumn column : eventData.getColumns()) { boolean override = false; for (EventColumn newEventColumn : newEventColumns) { if (StringUtils.equalsIgnoreCase( newEventColumn.getColumnName(), column.getColumnName())) { override = true; break; } } if (!override) { // 针对newcolumns不存在的记录进行添加 newEventColumns.add(column); } } Collections.sort(newEventColumns, new EventColumnIndexComparable()); // 重新排个序 eventData.setColumns(newEventColumns); } } } catch (InterruptedException e) { // ignore } finally { Thread.currentThread().setName(WORKER_NAME); MDC.remove(OtterConstants.splitPipelineLogFileKey); } }