Пример #1
0
  public Model train(DataSet dataSet, SVMParameter parameter) throws OperatorException {
    para = parameter;
    setDataSourceInfo(
        DataSourceInfoFactory.createConnectionInfo(
            ((DBTable) dataSet.getDBTable()).getDatabaseConnection().getProperties().getName()));
    DatabaseConnection databaseConnection =
        ((DBTable) dataSet.getDBTable()).getDatabaseConnection();
    Column label = dataSet.getColumns().getLabel();
    String labelString = StringHandler.doubleQ(label.getName());

    DataSet newDataSet = getTransformer().TransformCategoryToNumeric_new(dataSet);
    String newTableName = ((DBTable) newDataSet.getDBTable()).getTableName();

    Statement st = null;
    ResultSet rs = null;
    try {
      st = databaseConnection.createStatement(false);
    } catch (SQLException e) {
      e.printStackTrace();
      throw new OperatorException(e.getLocalizedMessage());
    }
    StringBuffer ind = getColumnArray(newDataSet);
    StringBuffer where = getColumnWhere(newDataSet);
    where.append(" and ").append(labelString).append(" is not null ");
    SVMRegressionModel model = new SVMRegressionModel(dataSet, newDataSet);
    if (!newDataSet.equals(dataSet)) {
      model.setAllTransformMap_valueKey(getTransformer().getAllTransformMap_valueKey());
    }
    model.setKernelType(para.getKernelType());
    model.setDegree(para.getDegree());
    model.setGamma(para.getGamma());

    String sql =
        "select (model).inds, (model).cum_err, (model).epsilon, (model).rho, (model).b, (model).nsvs, (model).ind_dim, (model).weights, (model).individuals from (select alpine_miner_online_sv_reg('"
            + newTableName
            + "','"
            + ind
            + "','"
            + labelString
            + "','"
            + where
            + "',"
            + para.getKernelType()
            + ","
            + para.getDegree()
            + ","
            + para.getGamma()
            + ","
            + para.getEta()
            + ","
            + ((SVMRegressionParameter) para).getSlambda()
            + ","
            + para.getNu()
            + ") as model ";
    if (getDataSourceInfo().getDBType().equals(DataSourceInfoOracle.dBType)) {
      sql += " from dual ";
    }
    sql += ") a";
    try {
      itsLogger.debug("SVMRegression.train():sql=" + sql);
      rs = st.executeQuery(sql.toString());
      setModel(rs, model);
      if (getTransformer().isTransform()) {
        dropTable(st, newTableName);
      }
      rs.close();
      st.close();
    } catch (SQLException e) {
      e.printStackTrace();
      throw new OperatorException(e.getLocalizedMessage());
    }
    return model;
  }
Пример #2
0
  /* (non-Javadoc)
   * @see com.alpine.datamining.api.impl.db.AbstractDBModelTrainer#train(com.alpine.datamining.api.AnalyticSource)
   */
  @Override
  protected Model train(AnalyticSource source) throws AnalysisException {
    ResultSet rs = null;
    Statement st = null;
    EMModel trainModel = null;
    try {

      IDataSourceInfo dataSourceInfo =
          DataSourceInfoFactory.createConnectionInfo(source.getDataSourceType());
      dbtype = dataSourceInfo.getDBType();

      EMConfig config = (EMConfig) source.getAnalyticConfig();

      String anaColumns = config.getColumnNames();
      String[] columnsArray = anaColumns.split(",");
      List<String> transformColumns = new ArrayList<String>();
      for (int i = 0; i < columnsArray.length; i++) {

        transformColumns.add(columnsArray[i]);
      }
      DataSet dataSet = getDataSet((DataBaseAnalyticSource) source, config);
      filerColumens(dataSet, transformColumns);
      dataSet.computeAllColumnStatistics();
      ColumnTypeTransformer transformer = new ColumnTypeTransformer();
      DataSet newDataSet = transformer.TransformCategoryToNumeric_new(dataSet);
      String tableName = ((DBTable) newDataSet.getDBTable()).getTableName();
      Columns columns = newDataSet.getColumns();
      List<String> newTransformColumns = new ArrayList<String>();
      HashMap<String, String> transformMap = new HashMap<String, String>();
      for (String key : transformer.getAllTransformMap_valueKey().keySet()) {
        HashMap<String, String> values = (transformer.getAllTransformMap_valueKey()).get(key);
        for (String lowKey : values.keySet()) {
          transformMap.put(values.get(lowKey), lowKey);
        }
      }

      Iterator<Column> attributeIter = columns.iterator();
      while (attributeIter.hasNext()) {
        Column column = attributeIter.next();
        newTransformColumns.add(column.getName());
      }

      int maxIterationNumber = Integer.parseInt(config.getMaxIterationNumber());
      int clusterNumber = Integer.parseInt(config.getClusterNumber());
      double epsilon = Double.parseDouble(config.getEpsilon());
      int initClusterSize = 10;
      if (config.getInitClusterSize() != null) {
        initClusterSize = Integer.parseInt(config.getInitClusterSize());
      }
      if (newDataSet.size() < initClusterSize * clusterNumber) {
        initClusterSize = (int) (newDataSet.size() / clusterNumber + 1);
      } // TODO  get it from config and make sure it will not be too large
      EMClusterImpl emImpl = EMClusterFactory.createEMAnalyzer(dbtype);
      trainModel = EMClusterFactory.createEMModel(dbtype, newDataSet);
      Connection connection = null;
      connection = ((DataBaseAnalyticSource) source).getConnection();

      st = connection.createStatement();

      ArrayList<Double> tempResult =
          emImpl.emTrain(
              connection,
              st,
              tableName,
              maxIterationNumber,
              epsilon,
              clusterNumber,
              newTransformColumns,
              initClusterSize,
              trainModel);
      trainModel = generateEMModel(trainModel, newTransformColumns, clusterNumber, tempResult);
      if (!newDataSet.equals(this.dataSet)) {
        trainModel.setAllTransformMap_valueKey(transformMap);
      }
    } catch (Exception e) {
      logger.error(e);
      if (e instanceof WrongUsedException) {
        throw new AnalysisError(this, (WrongUsedException) e);
      } else if (e instanceof AnalysisError) {
        throw (AnalysisError) e;
      } else {
        throw new AnalysisException(e);
      }
    } finally {
      try {
        if (st != null) {
          st.close();
        }
        if (rs != null) {
          rs.close();
        }
      } catch (SQLException e) {
        logger.debug(e.toString());
        throw new AnalysisException(e.getLocalizedMessage());
      }
    }
    return trainModel;
  }