Beispiel #1
0
  // ======================== help method ===================
  // 保存对应的dbBatch
  private HttpPipeKey saveDbBatch(DbBatch dbBatch) {
    RowBatch rowBatch = dbBatch.getRowBatch();
    // 转化为proto对象
    BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder();
    rowBatchBuilder.setIdentity(build(rowBatch.getIdentity()));
    // 处理具体的字段rowData
    for (EventData eventData : rowBatch.getDatas()) {
      BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder();
      rowDataBuilder.setPairId(eventData.getPairId());
      rowDataBuilder.setTableId(eventData.getTableId());
      if (eventData.getSchemaName() != null) {
        rowDataBuilder.setSchemaName(eventData.getSchemaName());
      }
      rowDataBuilder.setTableName(eventData.getTableName());
      rowDataBuilder.setEventType(eventData.getEventType().getValue());
      rowDataBuilder.setExecuteTime(eventData.getExecuteTime());
      // add by ljh at 2012-10-31
      if (eventData.getSyncMode() != null) {
        rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue());
      }
      if (eventData.getSyncConsistency() != null) {
        rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue());
      }

      // 构造key column
      for (EventColumn keyColumn : eventData.getKeys()) {
        rowDataBuilder.addKeys(buildColumn(keyColumn));
      }
      // 构造old key column
      if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) {
        for (EventColumn keyColumn : eventData.getOldKeys()) {
          rowDataBuilder.addOldKeys(buildColumn(keyColumn));
        }
      }

      // 构造其他 column
      for (EventColumn column : eventData.getColumns()) {
        rowDataBuilder.addColumns(buildColumn(column));
      }

      rowDataBuilder.setRemedy(eventData.isRemedy());
      rowDataBuilder.setSize(eventData.getSize());
      rowBatchBuilder.addRows(rowDataBuilder.build()); // 添加一条rowData记录
    }

    // 处理下FileBatch
    FileBatch fileBatch = dbBatch.getFileBatch();
    BatchProto.FileBatch.Builder fileBatchBuilder = null;
    fileBatchBuilder = BatchProto.FileBatch.newBuilder();
    fileBatchBuilder.setIdentity(build(fileBatch.getIdentity()));
    // 构造对应的proto对象
    for (FileData fileData : fileBatch.getFiles()) {
      BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder();
      fileDataBuilder.setPairId(fileData.getPairId());
      fileDataBuilder.setTableId(fileData.getTableId());
      if (fileData.getNameSpace() != null) {
        fileDataBuilder.setNamespace(fileData.getNameSpace());
      }
      if (fileData.getPath() != null) {
        fileDataBuilder.setPath(fileData.getPath());
      }
      fileDataBuilder.setEventType(fileData.getEventType().getValue());
      fileDataBuilder.setSize(fileData.getSize());
      fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime());

      fileBatchBuilder.addFiles(fileDataBuilder.build()); // 添加一条fileData记录
    }
    // 处理构造对应的文件url
    String filename =
        buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass()));
    // 写入数据
    File file = new File(htdocsDir, filename);
    OutputStream output = null;
    try {
      output = new BufferedOutputStream(new FileOutputStream(file));
      com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto =
          rowBatchBuilder.build();
      output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize())); // 输出大小
      rowBatchProto.writeTo(output); // 输出row batch

      com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto =
          fileBatchBuilder.build();
      output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize())); // 输出大小
      fileBatchProto.writeTo(output); // 输出file batch
      output.flush();
    } catch (IOException e) {
      throw new PipeException("write_byte_error", e);
    } finally {
      IOUtils.closeQuietly(output);
    }

    HttpPipeKey key = new HttpPipeKey();
    key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename));
    key.setDataType(PipeDataType.DB_BATCH);
    key.setIdentity(rowBatch.getIdentity());
    Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId());
    if (pipeline.getParameters().getUseFileEncrypt()) {
      // 加密处理
      EncryptedData encryptedData = encryptFile(file);
      key.setKey(encryptedData.getKey());
      key.setCrc(encryptedData.getCrc());
    }

    return key;
  }
Beispiel #2
0
  @Override
  public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    Assert.notNull(dbBatch.getRowBatch());
    // 读取配置
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
    boolean isRow = pipeline.getParameters().getSyncMode().isRow(); // 如果是行记录是必须进行数据库反查
    // 读取一次配置
    adjustPoolSize(pipeline.getParameters().getExtractPoolSize()); // 调整下线程池,Extractor会被池化处理
    ExecutorCompletionService completionService = new ExecutorCompletionService(executor);

    // 进行并发提交
    ExtractException exception = null;
    // 每个表进行处理
    List<DataItem> items = new ArrayList<DataItem>();
    List<Future> futures = new ArrayList<Future>();
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    for (EventData eventData : eventDatas) {
      if (eventData.getEventType().isDdl()) {
        continue;
      }

      DataItem item = new DataItem(eventData);
      // 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询
      boolean flag =
          mustDb
              || (eventData.getSyncConsistency() != null
                  && eventData.getSyncConsistency().isMedia());

      // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查
      if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
        DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
        if (dataMedia.getSource().getType().isOracle()) {
          flag |= true;
          eventData.setRemedy(true); // 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录
        }
      }

      if (isRow && !flag) {
        // 提前判断一次,避免进入多线程进行竞争
        // 针对view视图的情况,会有后续再判断一次
        flag = checkNeedDbForRowMode(pipeline, eventData);
      }

      if (flag
          && (eventData.getEventType().isInsert()
              || eventData.getEventType().isUpdate())) { // 判断是否需要反查
        Future future =
            completionService.submit(new DatabaseExtractWorker(pipeline, item), null); // 提交进行并行查询
        if (future.isDone()) {
          // 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常
          try {
            future.get();
          } catch (InterruptedException e) {
            cancel(futures); // 取消完之后立马退出
            throw new ExtractException(e);
          } catch (ExecutionException e) {
            cancel(futures); // 取消完之后立马退出
            throw new ExtractException(e);
          }
        }

        futures.add(future); // 记录一下添加的任务
      }

      items.add(item); // 按顺序添加
    }

    // 开始处理结果
    int index = 0;
    while (index < futures.size()) { // 循环处理发出去的所有任务
      try {
        Future future = completionService.take(); // 它也可能被打断
        future.get();
      } catch (InterruptedException e) {
        exception = new ExtractException(e);
        break; // 如何一个future出现了异常,就退出
      } catch (ExecutionException e) {
        exception = new ExtractException(e);
        break; // 如何一个future出现了异常,就退出
      }

      index++;
    }

    if (index < futures.size()) {
      // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
      cancel(futures);
      throw exception;
    } else {
      // 全部成功分支, 构造返回结果也要保证原始的顺序
      for (int i = 0; i < items.size(); i++) {
        DataItem item = items.get(i);
        if (item.filter) { // 忽略需要被过滤的数据,比如数据库反查时记录已经不存在
          eventDatas.remove(item.getEventData());
        }
      }
    }
  }