// 处理对应的dbBatch private DbBatch getDbBatch(HttpPipeKey key) { String dataUrl = key.getUrl(); Pipeline pipeline = configClientService.findPipeline(key.getIdentity().getPipelineId()); DataRetriever dataRetriever = dataRetrieverFactory.createRetriever( pipeline.getParameters().getRetriever(), dataUrl, downloadDir); File archiveFile = null; try { dataRetriever.connect(); dataRetriever.doRetrieve(); archiveFile = dataRetriever.getDataAsFile(); } catch (Exception e) { dataRetriever.abort(); throw new PipeException("download_error", e); } finally { dataRetriever.disconnect(); } // 处理下有加密的数据 if (StringUtils.isNotEmpty(key.getKey()) && StringUtils.isNotEmpty(key.getCrc())) { decodeFile(archiveFile, key.getKey(), key.getCrc()); } InputStream input = null; JSONReader reader = null; try { input = new BufferedInputStream(new FileInputStream(archiveFile)); DbBatch dbBatch = new DbBatch(); byte[] lengthBytes = new byte[4]; input.read(lengthBytes); int length = ByteUtils.bytes2int(lengthBytes); BatchProto.RowBatch rowbatchProto = BatchProto.RowBatch.parseFrom(new LimitedInputStream(input, length)); // 构造原始的model对象 RowBatch rowBatch = new RowBatch(); rowBatch.setIdentity(build(rowbatchProto.getIdentity())); for (BatchProto.RowData rowDataProto : rowbatchProto.getRowsList()) { EventData eventData = new EventData(); eventData.setPairId(rowDataProto.getPairId()); eventData.setTableId(rowDataProto.getTableId()); eventData.setTableName(rowDataProto.getTableName()); eventData.setSchemaName(rowDataProto.getSchemaName()); eventData.setEventType(EventType.valuesOf(rowDataProto.getEventType())); eventData.setExecuteTime(rowDataProto.getExecuteTime()); // add by ljh at 2012-10-31 if (StringUtils.isNotEmpty(rowDataProto.getSyncMode())) { eventData.setSyncMode(SyncMode.valuesOf(rowDataProto.getSyncMode())); } if (StringUtils.isNotEmpty(rowDataProto.getSyncConsistency())) { eventData.setSyncConsistency(SyncConsistency.valuesOf(rowDataProto.getSyncConsistency())); } // 处理主键 List<EventColumn> keys = new ArrayList<EventColumn>(); for (BatchProto.Column columnProto : rowDataProto.getKeysList()) { keys.add(buildColumn(columnProto)); } eventData.setKeys(keys); // 处理old主键 if (CollectionUtils.isEmpty(rowDataProto.getOldKeysList()) == false) { List<EventColumn> oldKeys = new ArrayList<EventColumn>(); for (BatchProto.Column columnProto : rowDataProto.getOldKeysList()) { oldKeys.add(buildColumn(columnProto)); } eventData.setOldKeys(oldKeys); } // 处理具体的column value List<EventColumn> columns = new ArrayList<EventColumn>(); for (BatchProto.Column columnProto : rowDataProto.getColumnsList()) { columns.add(buildColumn(columnProto)); } eventData.setColumns(columns); eventData.setRemedy(rowDataProto.getRemedy()); eventData.setSize(rowDataProto.getSize()); // 添加到总记录 rowBatch.merge(eventData); } dbBatch.setRowBatch(rowBatch); input.read(lengthBytes); length = ByteUtils.bytes2int(lengthBytes); BatchProto.FileBatch filebatchProto = BatchProto.FileBatch.parseFrom(new LimitedInputStream(input, length)); // 构造原始的model对象 FileBatch fileBatch = new FileBatch(); fileBatch.setIdentity(build(filebatchProto.getIdentity())); for (BatchProto.FileData fileDataProto : filebatchProto.getFilesList()) { FileData fileData = new FileData(); fileData.setPairId(fileDataProto.getPairId()); fileData.setTableId(fileDataProto.getTableId()); fileData.setEventType(EventType.valuesOf(fileDataProto.getEventType())); fileData.setLastModifiedTime(fileDataProto.getLastModifiedTime()); fileData.setNameSpace(fileDataProto.getNamespace()); fileData.setPath(fileDataProto.getPath()); fileData.setSize(fileDataProto.getSize()); // 添加到filebatch中 fileBatch.getFiles().add(fileData); } dbBatch.setFileBatch(fileBatch); return dbBatch; } catch (IOException e) { throw new PipeException("deserial_error", e); } finally { IOUtils.closeQuietly(reader); } }
// ======================== help method =================== // 保存对应的dbBatch private HttpPipeKey saveDbBatch(DbBatch dbBatch) { RowBatch rowBatch = dbBatch.getRowBatch(); // 转化为proto对象 BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder(); rowBatchBuilder.setIdentity(build(rowBatch.getIdentity())); // 处理具体的字段rowData for (EventData eventData : rowBatch.getDatas()) { BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder(); rowDataBuilder.setPairId(eventData.getPairId()); rowDataBuilder.setTableId(eventData.getTableId()); if (eventData.getSchemaName() != null) { rowDataBuilder.setSchemaName(eventData.getSchemaName()); } rowDataBuilder.setTableName(eventData.getTableName()); rowDataBuilder.setEventType(eventData.getEventType().getValue()); rowDataBuilder.setExecuteTime(eventData.getExecuteTime()); // add by ljh at 2012-10-31 if (eventData.getSyncMode() != null) { rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue()); } if (eventData.getSyncConsistency() != null) { rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue()); } // 构造key column for (EventColumn keyColumn : eventData.getKeys()) { rowDataBuilder.addKeys(buildColumn(keyColumn)); } // 构造old key column if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) { for (EventColumn keyColumn : eventData.getOldKeys()) { rowDataBuilder.addOldKeys(buildColumn(keyColumn)); } } // 构造其他 column for (EventColumn column : eventData.getColumns()) { rowDataBuilder.addColumns(buildColumn(column)); } rowDataBuilder.setRemedy(eventData.isRemedy()); rowDataBuilder.setSize(eventData.getSize()); rowBatchBuilder.addRows(rowDataBuilder.build()); // 添加一条rowData记录 } // 处理下FileBatch FileBatch fileBatch = dbBatch.getFileBatch(); BatchProto.FileBatch.Builder fileBatchBuilder = null; fileBatchBuilder = BatchProto.FileBatch.newBuilder(); fileBatchBuilder.setIdentity(build(fileBatch.getIdentity())); // 构造对应的proto对象 for (FileData fileData : fileBatch.getFiles()) { BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder(); fileDataBuilder.setPairId(fileData.getPairId()); fileDataBuilder.setTableId(fileData.getTableId()); if (fileData.getNameSpace() != null) { fileDataBuilder.setNamespace(fileData.getNameSpace()); } if (fileData.getPath() != null) { fileDataBuilder.setPath(fileData.getPath()); } fileDataBuilder.setEventType(fileData.getEventType().getValue()); fileDataBuilder.setSize(fileData.getSize()); fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime()); fileBatchBuilder.addFiles(fileDataBuilder.build()); // 添加一条fileData记录 } // 处理构造对应的文件url String filename = buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass())); // 写入数据 File file = new File(htdocsDir, filename); OutputStream output = null; try { output = new BufferedOutputStream(new FileOutputStream(file)); com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto = rowBatchBuilder.build(); output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize())); // 输出大小 rowBatchProto.writeTo(output); // 输出row batch com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto = fileBatchBuilder.build(); output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize())); // 输出大小 fileBatchProto.writeTo(output); // 输出file batch output.flush(); } catch (IOException e) { throw new PipeException("write_byte_error", e); } finally { IOUtils.closeQuietly(output); } HttpPipeKey key = new HttpPipeKey(); key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename)); key.setDataType(PipeDataType.DB_BATCH); key.setIdentity(rowBatch.getIdentity()); Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId()); if (pipeline.getParameters().getUseFileEncrypt()) { // 加密处理 EncryptedData encryptedData = encryptFile(file); key.setKey(encryptedData.getKey()); key.setCrc(encryptedData.getCrc()); } return key; }
@Override public void extract(DbBatch dbBatch) throws ExtractException { Assert.notNull(dbBatch); Assert.notNull(dbBatch.getRowBatch()); // 读取配置 Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId()); boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia(); boolean isRow = pipeline.getParameters().getSyncMode().isRow(); // 如果是行记录是必须进行数据库反查 // 读取一次配置 adjustPoolSize(pipeline.getParameters().getExtractPoolSize()); // 调整下线程池,Extractor会被池化处理 ExecutorCompletionService completionService = new ExecutorCompletionService(executor); // 进行并发提交 ExtractException exception = null; // 每个表进行处理 List<DataItem> items = new ArrayList<DataItem>(); List<Future> futures = new ArrayList<Future>(); List<EventData> eventDatas = dbBatch.getRowBatch().getDatas(); for (EventData eventData : eventDatas) { if (eventData.getEventType().isDdl()) { continue; } DataItem item = new DataItem(eventData); // 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询 boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia()); // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查 if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) { DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId()); if (dataMedia.getSource().getType().isOracle()) { flag |= true; eventData.setRemedy(true); // 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录 } } if (isRow && !flag) { // 提前判断一次,避免进入多线程进行竞争 // 针对view视图的情况,会有后续再判断一次 flag = checkNeedDbForRowMode(pipeline, eventData); } if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) { // 判断是否需要反查 Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null); // 提交进行并行查询 if (future.isDone()) { // 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常 try { future.get(); } catch (InterruptedException e) { cancel(futures); // 取消完之后立马退出 throw new ExtractException(e); } catch (ExecutionException e) { cancel(futures); // 取消完之后立马退出 throw new ExtractException(e); } } futures.add(future); // 记录一下添加的任务 } items.add(item); // 按顺序添加 } // 开始处理结果 int index = 0; while (index < futures.size()) { // 循环处理发出去的所有任务 try { Future future = completionService.take(); // 它也可能被打断 future.get(); } catch (InterruptedException e) { exception = new ExtractException(e); break; // 如何一个future出现了异常,就退出 } catch (ExecutionException e) { exception = new ExtractException(e); break; // 如何一个future出现了异常,就退出 } index++; } if (index < futures.size()) { // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录 cancel(futures); throw exception; } else { // 全部成功分支, 构造返回结果也要保证原始的顺序 for (int i = 0; i < items.size(); i++) { DataItem item = items.get(i); if (item.filter) { // 忽略需要被过滤的数据,比如数据库反查时记录已经不存在 eventDatas.remove(item.getEventData()); } } } }