Пример #1
0
  // 处理对应的dbBatch
  private DbBatch getDbBatch(HttpPipeKey key) {
    String dataUrl = key.getUrl();
    Pipeline pipeline = configClientService.findPipeline(key.getIdentity().getPipelineId());
    DataRetriever dataRetriever =
        dataRetrieverFactory.createRetriever(
            pipeline.getParameters().getRetriever(), dataUrl, downloadDir);
    File archiveFile = null;
    try {
      dataRetriever.connect();
      dataRetriever.doRetrieve();
      archiveFile = dataRetriever.getDataAsFile();
    } catch (Exception e) {
      dataRetriever.abort();
      throw new PipeException("download_error", e);
    } finally {
      dataRetriever.disconnect();
    }

    // 处理下有加密的数据
    if (StringUtils.isNotEmpty(key.getKey()) && StringUtils.isNotEmpty(key.getCrc())) {
      decodeFile(archiveFile, key.getKey(), key.getCrc());
    }

    InputStream input = null;
    JSONReader reader = null;
    try {
      input = new BufferedInputStream(new FileInputStream(archiveFile));
      DbBatch dbBatch = new DbBatch();
      byte[] lengthBytes = new byte[4];
      input.read(lengthBytes);
      int length = ByteUtils.bytes2int(lengthBytes);
      BatchProto.RowBatch rowbatchProto =
          BatchProto.RowBatch.parseFrom(new LimitedInputStream(input, length));
      // 构造原始的model对象
      RowBatch rowBatch = new RowBatch();
      rowBatch.setIdentity(build(rowbatchProto.getIdentity()));
      for (BatchProto.RowData rowDataProto : rowbatchProto.getRowsList()) {
        EventData eventData = new EventData();
        eventData.setPairId(rowDataProto.getPairId());
        eventData.setTableId(rowDataProto.getTableId());
        eventData.setTableName(rowDataProto.getTableName());
        eventData.setSchemaName(rowDataProto.getSchemaName());
        eventData.setEventType(EventType.valuesOf(rowDataProto.getEventType()));
        eventData.setExecuteTime(rowDataProto.getExecuteTime());
        // add by ljh at 2012-10-31
        if (StringUtils.isNotEmpty(rowDataProto.getSyncMode())) {
          eventData.setSyncMode(SyncMode.valuesOf(rowDataProto.getSyncMode()));
        }
        if (StringUtils.isNotEmpty(rowDataProto.getSyncConsistency())) {
          eventData.setSyncConsistency(SyncConsistency.valuesOf(rowDataProto.getSyncConsistency()));
        }
        // 处理主键
        List<EventColumn> keys = new ArrayList<EventColumn>();
        for (BatchProto.Column columnProto : rowDataProto.getKeysList()) {
          keys.add(buildColumn(columnProto));
        }
        eventData.setKeys(keys);
        // 处理old主键
        if (CollectionUtils.isEmpty(rowDataProto.getOldKeysList()) == false) {
          List<EventColumn> oldKeys = new ArrayList<EventColumn>();
          for (BatchProto.Column columnProto : rowDataProto.getOldKeysList()) {
            oldKeys.add(buildColumn(columnProto));
          }
          eventData.setOldKeys(oldKeys);
        }
        // 处理具体的column value
        List<EventColumn> columns = new ArrayList<EventColumn>();
        for (BatchProto.Column columnProto : rowDataProto.getColumnsList()) {
          columns.add(buildColumn(columnProto));
        }
        eventData.setColumns(columns);

        eventData.setRemedy(rowDataProto.getRemedy());
        eventData.setSize(rowDataProto.getSize());
        // 添加到总记录
        rowBatch.merge(eventData);
      }
      dbBatch.setRowBatch(rowBatch);

      input.read(lengthBytes);
      length = ByteUtils.bytes2int(lengthBytes);
      BatchProto.FileBatch filebatchProto =
          BatchProto.FileBatch.parseFrom(new LimitedInputStream(input, length));
      // 构造原始的model对象
      FileBatch fileBatch = new FileBatch();
      fileBatch.setIdentity(build(filebatchProto.getIdentity()));
      for (BatchProto.FileData fileDataProto : filebatchProto.getFilesList()) {
        FileData fileData = new FileData();
        fileData.setPairId(fileDataProto.getPairId());
        fileData.setTableId(fileDataProto.getTableId());
        fileData.setEventType(EventType.valuesOf(fileDataProto.getEventType()));
        fileData.setLastModifiedTime(fileDataProto.getLastModifiedTime());
        fileData.setNameSpace(fileDataProto.getNamespace());
        fileData.setPath(fileDataProto.getPath());
        fileData.setSize(fileDataProto.getSize());
        // 添加到filebatch中
        fileBatch.getFiles().add(fileData);
      }
      dbBatch.setFileBatch(fileBatch);
      return dbBatch;
    } catch (IOException e) {
      throw new PipeException("deserial_error", e);
    } finally {
      IOUtils.closeQuietly(reader);
    }
  }
Пример #2
0
  // ======================== help method ===================
  // 保存对应的dbBatch
  private HttpPipeKey saveDbBatch(DbBatch dbBatch) {
    RowBatch rowBatch = dbBatch.getRowBatch();
    // 转化为proto对象
    BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder();
    rowBatchBuilder.setIdentity(build(rowBatch.getIdentity()));
    // 处理具体的字段rowData
    for (EventData eventData : rowBatch.getDatas()) {
      BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder();
      rowDataBuilder.setPairId(eventData.getPairId());
      rowDataBuilder.setTableId(eventData.getTableId());
      if (eventData.getSchemaName() != null) {
        rowDataBuilder.setSchemaName(eventData.getSchemaName());
      }
      rowDataBuilder.setTableName(eventData.getTableName());
      rowDataBuilder.setEventType(eventData.getEventType().getValue());
      rowDataBuilder.setExecuteTime(eventData.getExecuteTime());
      // add by ljh at 2012-10-31
      if (eventData.getSyncMode() != null) {
        rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue());
      }
      if (eventData.getSyncConsistency() != null) {
        rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue());
      }

      // 构造key column
      for (EventColumn keyColumn : eventData.getKeys()) {
        rowDataBuilder.addKeys(buildColumn(keyColumn));
      }
      // 构造old key column
      if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) {
        for (EventColumn keyColumn : eventData.getOldKeys()) {
          rowDataBuilder.addOldKeys(buildColumn(keyColumn));
        }
      }

      // 构造其他 column
      for (EventColumn column : eventData.getColumns()) {
        rowDataBuilder.addColumns(buildColumn(column));
      }

      rowDataBuilder.setRemedy(eventData.isRemedy());
      rowDataBuilder.setSize(eventData.getSize());
      rowBatchBuilder.addRows(rowDataBuilder.build()); // 添加一条rowData记录
    }

    // 处理下FileBatch
    FileBatch fileBatch = dbBatch.getFileBatch();
    BatchProto.FileBatch.Builder fileBatchBuilder = null;
    fileBatchBuilder = BatchProto.FileBatch.newBuilder();
    fileBatchBuilder.setIdentity(build(fileBatch.getIdentity()));
    // 构造对应的proto对象
    for (FileData fileData : fileBatch.getFiles()) {
      BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder();
      fileDataBuilder.setPairId(fileData.getPairId());
      fileDataBuilder.setTableId(fileData.getTableId());
      if (fileData.getNameSpace() != null) {
        fileDataBuilder.setNamespace(fileData.getNameSpace());
      }
      if (fileData.getPath() != null) {
        fileDataBuilder.setPath(fileData.getPath());
      }
      fileDataBuilder.setEventType(fileData.getEventType().getValue());
      fileDataBuilder.setSize(fileData.getSize());
      fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime());

      fileBatchBuilder.addFiles(fileDataBuilder.build()); // 添加一条fileData记录
    }
    // 处理构造对应的文件url
    String filename =
        buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass()));
    // 写入数据
    File file = new File(htdocsDir, filename);
    OutputStream output = null;
    try {
      output = new BufferedOutputStream(new FileOutputStream(file));
      com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto =
          rowBatchBuilder.build();
      output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize())); // 输出大小
      rowBatchProto.writeTo(output); // 输出row batch

      com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto =
          fileBatchBuilder.build();
      output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize())); // 输出大小
      fileBatchProto.writeTo(output); // 输出file batch
      output.flush();
    } catch (IOException e) {
      throw new PipeException("write_byte_error", e);
    } finally {
      IOUtils.closeQuietly(output);
    }

    HttpPipeKey key = new HttpPipeKey();
    key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename));
    key.setDataType(PipeDataType.DB_BATCH);
    key.setIdentity(rowBatch.getIdentity());
    Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId());
    if (pipeline.getParameters().getUseFileEncrypt()) {
      // 加密处理
      EncryptedData encryptedData = encryptFile(file);
      key.setKey(encryptedData.getKey());
      key.setCrc(encryptedData.getCrc());
    }

    return key;
  }
Пример #3
0
  @Override
  public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    Assert.notNull(dbBatch.getRowBatch());
    // 读取配置
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
    boolean isRow = pipeline.getParameters().getSyncMode().isRow(); // 如果是行记录是必须进行数据库反查
    // 读取一次配置
    adjustPoolSize(pipeline.getParameters().getExtractPoolSize()); // 调整下线程池,Extractor会被池化处理
    ExecutorCompletionService completionService = new ExecutorCompletionService(executor);

    // 进行并发提交
    ExtractException exception = null;
    // 每个表进行处理
    List<DataItem> items = new ArrayList<DataItem>();
    List<Future> futures = new ArrayList<Future>();
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    for (EventData eventData : eventDatas) {
      if (eventData.getEventType().isDdl()) {
        continue;
      }

      DataItem item = new DataItem(eventData);
      // 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询
      boolean flag =
          mustDb
              || (eventData.getSyncConsistency() != null
                  && eventData.getSyncConsistency().isMedia());

      // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查
      if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
        DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
        if (dataMedia.getSource().getType().isOracle()) {
          flag |= true;
          eventData.setRemedy(true); // 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录
        }
      }

      if (isRow && !flag) {
        // 提前判断一次,避免进入多线程进行竞争
        // 针对view视图的情况,会有后续再判断一次
        flag = checkNeedDbForRowMode(pipeline, eventData);
      }

      if (flag
          && (eventData.getEventType().isInsert()
              || eventData.getEventType().isUpdate())) { // 判断是否需要反查
        Future future =
            completionService.submit(new DatabaseExtractWorker(pipeline, item), null); // 提交进行并行查询
        if (future.isDone()) {
          // 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常
          try {
            future.get();
          } catch (InterruptedException e) {
            cancel(futures); // 取消完之后立马退出
            throw new ExtractException(e);
          } catch (ExecutionException e) {
            cancel(futures); // 取消完之后立马退出
            throw new ExtractException(e);
          }
        }

        futures.add(future); // 记录一下添加的任务
      }

      items.add(item); // 按顺序添加
    }

    // 开始处理结果
    int index = 0;
    while (index < futures.size()) { // 循环处理发出去的所有任务
      try {
        Future future = completionService.take(); // 它也可能被打断
        future.get();
      } catch (InterruptedException e) {
        exception = new ExtractException(e);
        break; // 如何一个future出现了异常,就退出
      } catch (ExecutionException e) {
        exception = new ExtractException(e);
        break; // 如何一个future出现了异常,就退出
      }

      index++;
    }

    if (index < futures.size()) {
      // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
      cancel(futures);
      throw exception;
    } else {
      // 全部成功分支, 构造返回结果也要保证原始的顺序
      for (int i = 0; i < items.size(); i++) {
        DataItem item = items.get(i);
        if (item.filter) { // 忽略需要被过滤的数据,比如数据库反查时记录已经不存在
          eventDatas.remove(item.getEventData());
        }
      }
    }
  }