public Model train(DataSet dataSet, SVMParameter parameter) throws OperatorException { para = parameter; setDataSourceInfo( DataSourceInfoFactory.createConnectionInfo( ((DBTable) dataSet.getDBTable()).getDatabaseConnection().getProperties().getName())); DatabaseConnection databaseConnection = ((DBTable) dataSet.getDBTable()).getDatabaseConnection(); Column label = dataSet.getColumns().getLabel(); String labelString = StringHandler.doubleQ(label.getName()); DataSet newDataSet = getTransformer().TransformCategoryToNumeric_new(dataSet); String newTableName = ((DBTable) newDataSet.getDBTable()).getTableName(); Statement st = null; ResultSet rs = null; try { st = databaseConnection.createStatement(false); } catch (SQLException e) { e.printStackTrace(); throw new OperatorException(e.getLocalizedMessage()); } StringBuffer ind = getColumnArray(newDataSet); StringBuffer where = getColumnWhere(newDataSet); where.append(" and ").append(labelString).append(" is not null "); SVMRegressionModel model = new SVMRegressionModel(dataSet, newDataSet); if (!newDataSet.equals(dataSet)) { model.setAllTransformMap_valueKey(getTransformer().getAllTransformMap_valueKey()); } model.setKernelType(para.getKernelType()); model.setDegree(para.getDegree()); model.setGamma(para.getGamma()); String sql = "select (model).inds, (model).cum_err, (model).epsilon, (model).rho, (model).b, (model).nsvs, (model).ind_dim, (model).weights, (model).individuals from (select alpine_miner_online_sv_reg('" + newTableName + "','" + ind + "','" + labelString + "','" + where + "'," + para.getKernelType() + "," + para.getDegree() + "," + para.getGamma() + "," + para.getEta() + "," + ((SVMRegressionParameter) para).getSlambda() + "," + para.getNu() + ") as model "; if (getDataSourceInfo().getDBType().equals(DataSourceInfoOracle.dBType)) { sql += " from dual "; } sql += ") a"; try { itsLogger.debug("SVMRegression.train():sql=" + sql); rs = st.executeQuery(sql.toString()); setModel(rs, model); if (getTransformer().isTransform()) { dropTable(st, newTableName); } rs.close(); st.close(); } catch (SQLException e) { e.printStackTrace(); throw new OperatorException(e.getLocalizedMessage()); } return model; }
/* (non-Javadoc) * @see com.alpine.datamining.api.impl.db.AbstractDBModelTrainer#train(com.alpine.datamining.api.AnalyticSource) */ @Override protected Model train(AnalyticSource source) throws AnalysisException { ResultSet rs = null; Statement st = null; EMModel trainModel = null; try { IDataSourceInfo dataSourceInfo = DataSourceInfoFactory.createConnectionInfo(source.getDataSourceType()); dbtype = dataSourceInfo.getDBType(); EMConfig config = (EMConfig) source.getAnalyticConfig(); String anaColumns = config.getColumnNames(); String[] columnsArray = anaColumns.split(","); List<String> transformColumns = new ArrayList<String>(); for (int i = 0; i < columnsArray.length; i++) { transformColumns.add(columnsArray[i]); } DataSet dataSet = getDataSet((DataBaseAnalyticSource) source, config); filerColumens(dataSet, transformColumns); dataSet.computeAllColumnStatistics(); ColumnTypeTransformer transformer = new ColumnTypeTransformer(); DataSet newDataSet = transformer.TransformCategoryToNumeric_new(dataSet); String tableName = ((DBTable) newDataSet.getDBTable()).getTableName(); Columns columns = newDataSet.getColumns(); List<String> newTransformColumns = new ArrayList<String>(); HashMap<String, String> transformMap = new HashMap<String, String>(); for (String key : transformer.getAllTransformMap_valueKey().keySet()) { HashMap<String, String> values = (transformer.getAllTransformMap_valueKey()).get(key); for (String lowKey : values.keySet()) { transformMap.put(values.get(lowKey), lowKey); } } Iterator<Column> attributeIter = columns.iterator(); while (attributeIter.hasNext()) { Column column = attributeIter.next(); newTransformColumns.add(column.getName()); } int maxIterationNumber = Integer.parseInt(config.getMaxIterationNumber()); int clusterNumber = Integer.parseInt(config.getClusterNumber()); double epsilon = Double.parseDouble(config.getEpsilon()); int initClusterSize = 10; if (config.getInitClusterSize() != null) { initClusterSize = Integer.parseInt(config.getInitClusterSize()); } if (newDataSet.size() < initClusterSize * clusterNumber) { initClusterSize = (int) (newDataSet.size() / clusterNumber + 1); } // TODO get it from config and make sure it will not be too large EMClusterImpl emImpl = EMClusterFactory.createEMAnalyzer(dbtype); trainModel = EMClusterFactory.createEMModel(dbtype, newDataSet); Connection connection = null; connection = ((DataBaseAnalyticSource) source).getConnection(); st = connection.createStatement(); ArrayList<Double> tempResult = emImpl.emTrain( connection, st, tableName, maxIterationNumber, epsilon, clusterNumber, newTransformColumns, initClusterSize, trainModel); trainModel = generateEMModel(trainModel, newTransformColumns, clusterNumber, tempResult); if (!newDataSet.equals(this.dataSet)) { trainModel.setAllTransformMap_valueKey(transformMap); } } catch (Exception e) { logger.error(e); if (e instanceof WrongUsedException) { throw new AnalysisError(this, (WrongUsedException) e); } else if (e instanceof AnalysisError) { throw (AnalysisError) e; } else { throw new AnalysisException(e); } } finally { try { if (st != null) { st.close(); } if (rs != null) { rs.close(); } } catch (SQLException e) { logger.debug(e.toString()); throw new AnalysisException(e.getLocalizedMessage()); } } return trainModel; }