protected AnalyticNodeMetaInfo createNodeMetaInfo(Locale locale) { AnalyticNodeMetaInfo nodeMetaInfo = new AnalyticNodeMetaInfo(); nodeMetaInfo.setAlgorithmName(SDKLanguagePack.getMessage(SDKLanguagePack.LIFT_NAME, locale)); nodeMetaInfo.setAlgorithmDescription( SDKLanguagePack.getMessage(SDKLanguagePack.LIFT_DESCRIPTION, locale)); return nodeMetaInfo; }
@Override protected AnalyticNodeMetaInfo createNodeMetaInfo(Locale locale) { AnalyticNodeMetaInfo localAnalyticNodeMetaInfo = new AnalyticNodeMetaInfo(); localAnalyticNodeMetaInfo.setAlgorithmName( SDKLanguagePack.getMessage(SDKLanguagePack.RANDOME_FOREST_TRAIN_NAME, locale)); localAnalyticNodeMetaInfo.setAlgorithmDescription( SDKLanguagePack.getMessage(SDKLanguagePack.RANDOME_FOREST_TRAIN_DESCRIPTION, locale)); return localAnalyticNodeMetaInfo; }
private AnalyticNodeMetaInfo createNodeMetaInfo(Locale locale) { AnalyticNodeMetaInfo nodeMetaInfo = new AnalyticNodeMetaInfo(); nodeMetaInfo.setAlgorithmName( SDKLanguagePack.getMessage(SDKLanguagePack.NAIVE_BAYES_PREDICT_NAME, locale)); nodeMetaInfo.setAlgorithmDescription( SDKLanguagePack.getMessage(SDKLanguagePack.NAIVE_BAYES_PREDICT_DESCRIPTION, locale)); return nodeMetaInfo; }
/* (non-Javadoc) * @see com.alpine.datamining.api.impl.db.AbstractDBModelTrainer#createNodeMetaInfo(java.util.Locale) */ @Override protected AnalyticNodeMetaInfo createNodeMetaInfo(Locale locale) { // TODO AnalyticNodeMetaInfo nodeMetaInfo = new AnalyticNodeMetaInfo(); nodeMetaInfo.setAlgorithmName( SDKLanguagePack.getMessage(SDKLanguagePack.EM_TRAIN_NAME, locale)); nodeMetaInfo.setAlgorithmDescription( SDKLanguagePack.getMessage(SDKLanguagePack.EM_TRAIN_DESCRIPTION, locale)); return nodeMetaInfo; }
public AnalyticOutPut doAnalysis(AnalyticSource source) throws AnalysisException { AnalyticOutPut result = null; HadoopPredictorConfig config = (HadoopPredictorConfig) source.getAnalyticConfig(); try { if (config instanceof TimeSeriesHadoopPredictorConfig) { ((TimeSeriesHadoopPredictorConfig) config) .setHadoopInfo(((HadoopAnalyticSource) source).getHadoopInfo()); result = doPredict(config); } else { result = doPredict((HadoopAnalyticSource) source, config); super.reportBadDataCount( hadoopRunner.getBadCounter(), HadoopConstants.Flow_Call_Back_URL, getName(), getFlowRunUUID()); if (hadoopRunner.isLocalMode() == true) { result.setExtraLogMessage( SDKLanguagePack.getMessage(SDKLanguagePack.LOCAL_MODE, config.getLocale())); } } result.setAnalyticNodeMetaInfo(createNodeMetaInfo(config.getLocale())); } catch (Exception e) { throw new AnalysisException(e); } return result; }
public AnalyticOutPut doAnalysis(AnalyticSource source) throws AnalysisException { AbstractModelTrainerConfig config = (AbstractModelTrainerConfig) source.getAnalyticConfig(); Model model = null; if (config.getTrainedModel() == null || config.getForceRetrain().equals("Yes")) { try { model = train(source); } catch (Error e) { logger.error(e); if (e instanceof OutOfMemoryError) { throw new AnalysisException( SDKLanguagePack.getMessage( SDKLanguagePack.ADABOOST_TOO_MANY_TRAINER, config.getLocale())); } } AnalyzerOutPutTrainModel analyzerOutPutModel = new AnalyzerOutPutTrainModel(model); String modelName = getName(); analyzerOutPutModel.getEngineModel().setName(modelName); analyzerOutPutModel.setAnalyticNodeMetaInfo(createNodeMetaInfo(config.getLocale())); // analyzerOutPutModel.setDataAnalyzerMap(dataAnalyzerMap); analyzerOutPutModel.setComeFromRetrain(true); return analyzerOutPutModel; } else { // need not train the model agian, UI have the reused model AnalyzerOutPutTrainModel analyzerOutPutModel = new AnalyzerOutPutTrainModel(config.getTrainedModel().getModel()); analyzerOutPutModel.getEngineModel().setName(getName()); analyzerOutPutModel.setAnalyticNodeMetaInfo(createNodeMetaInfo(config.getLocale())); // analyzerOutPutModel.setDataAnalyzerMap(dataAnalyzerMap); analyzerOutPutModel.setComeFromRetrain(false); return analyzerOutPutModel; } }
public void adaboostTrainSample( String inputSchema, long timeStamp, String dependentColumn, Statement st, ResultSet rs, String pnewTable, Locale locale) throws SQLException, AnalysisException { try { int breakLoop = 0; int maxLoop = AlpineMinerConfig.ADABOOST_SAMPLE; while (breakLoop != 1 && maxLoop != 0) { adaboostTrainSampleOnce(inputSchema, timeStamp, st, pnewTable); StringBuffer sql = new StringBuffer(); sql.append("select count(distinct "); sql.append(StringHandler.doubleQ(dependentColumn)); sql.append(") from "); sql.append(StringHandler.doubleQ(inputSchema)).append("."); sql.append(StringHandler.doubleQ(sampleTable)); sql.append(" "); logger.debug(sql.toString()); rs = st.executeQuery(sql.toString()); while (rs.next()) if (rs.getInt(1) > 1) breakLoop = 1; maxLoop--; } if (breakLoop != 1) { String e = SDKLanguagePack.getMessage(SDKLanguagePack.ADABOOST_SAMPLE_FAIL, locale); throw new AnalysisException(e); } } catch (SQLException e) { logger.error(e.getMessage(), e); throw new AnalysisException(e); } }
@Override protected Model train(AnalyticSource analyticSource) throws AnalysisException { ResultSet rs = null; Statement st = null; try { IDataSourceInfo dataSourceInfo = DataSourceInfoFactory.createConnectionInfo(analyticSource.getDataSourceType()); dbtype = dataSourceInfo.getDBType(); RandomForestModel lastResult = null; RandomForestIMP randomForestImpl = null; // if (dbtype.equalsIgnoreCase(DataSourceInfoOracle.dBType)) { // randomForestTrainer = new AdaboostOracle(); // // } else if (dbtype.equalsIgnoreCase(DataSourceInfoGreenplum.dBType) || dbtype.equalsIgnoreCase(DataSourceInfoPostgres.dBType)) { randomForestImpl = new RandomForestGreenplum(); } else if (dbtype.equalsIgnoreCase(DataSourceInfoOracle.dBType)) { randomForestImpl = new RandomForestOracle(); } else if (dbtype.equalsIgnoreCase(DataSourceInfoDB2.dBType)) { randomForestImpl = new RandomForestDB2(); ((RandomForestDB2) randomForestImpl) .setConnection(((DataBaseAnalyticSource) analyticSource).getConnection()); } else if (dbtype.equalsIgnoreCase(DataSourceInfoNZ.dBType)) { randomForestImpl = new RandomForestNZ(); } else { throw new AnalysisException("Databse type is not supported for Random Forest:" + dbtype); // return null; } try { dataSet = getDataSet((DataBaseAnalyticSource) analyticSource, analyticSource.getAnalyticConfig()); } catch (OperatorException e1) { logger.error(e1); throw new OperatorException(e1.getLocalizedMessage()); } setSpecifyColumn(dataSet, analyticSource.getAnalyticConfig()); dataSet.computeAllColumnStatistics(); RandomForestConfig rfConfig = (RandomForestConfig) analyticSource.getAnalyticConfig(); String dbSystem = ((DataBaseAnalyticSource) analyticSource).getDataBaseInfo().getSystem(); String url = ((DataBaseAnalyticSource) analyticSource).getDataBaseInfo().getUrl(); String userName = ((DataBaseAnalyticSource) analyticSource).getDataBaseInfo().getUserName(); String password = ((DataBaseAnalyticSource) analyticSource).getDataBaseInfo().getPassword(); String inputSchema = ((DataBaseAnalyticSource) analyticSource).getTableInfo().getSchema(); String tableName = ((DataBaseAnalyticSource) analyticSource).getTableInfo().getTableName(); String useSSL = ((DataBaseAnalyticSource) analyticSource).getDataBaseInfo().getUseSSL(); String sampleWithReplacement = rfConfig.getSampleWithReplacement(); long timeStamp = System.currentTimeMillis(); pnewTable = "pnew" + timeStamp; sampleTable = "s" + timeStamp; String dependentColumn = rfConfig.getDependentColumn(); String columnNames = rfConfig.getColumnNames(); String[] totalColumns = columnNames.split(","); int subSize = Integer.parseInt(rfConfig.getNodeColumnNumber()); int forestSize = Integer.parseInt(rfConfig.getForestSize()); Connection conncetion = null; if (dbtype.equalsIgnoreCase(DataSourceInfoGreenplum.dBType) || dbtype.equalsIgnoreCase(DataSourceInfoPostgres.dBType)) { lastResult = new RandomForestModelGreenplum(dataSet); } else if (dbtype.equalsIgnoreCase(DataSourceInfoOracle.dBType)) { lastResult = new RandomForestModelOracle(dataSet); } else if (dbtype.equalsIgnoreCase(DataSourceInfoDB2.dBType)) { lastResult = new RandomForestModelDB2(dataSet); } else if (dbtype.equalsIgnoreCase(DataSourceInfoNZ.dBType)) { lastResult = new RandomForestModelNZ(dataSet); } lastResult.setColumnNames(columnNames); lastResult.setDependColumn(dependentColumn); lastResult.setTableName(tableName); conncetion = ((DataBaseAnalyticSource) analyticSource).getConnection(); Model result = null; try { st = conncetion.createStatement(); } catch (SQLException e) { logger.error(e); throw new AnalysisException(e); } // Iterator<String> dependvalueIterator = dataSet.getColumns() // .getLabel().getMapping().getValues().iterator(); if (dataSet.getColumns().getLabel() instanceof NominalColumn) { if (dataSet.getColumns().getLabel().getMapping().getValues().size() <= 1) { String e = SDKLanguagePack.getMessage( SDKLanguagePack.ADABOOST_SAMPLE_ERRINFO, rfConfig.getLocale()); logger.error(e); throw new AnalysisException(e); } if (dataSet.getColumns().getLabel().getMapping().getValues().size() > AlpineMinerConfig.ADABOOST_MAX_DEPENDENT_COUNT) { String e = SDKLanguagePack.getMessage( SDKLanguagePack.ADABOOST_MAX_DEPENDENT_COUNT_ERRINFO, rfConfig.getLocale()); logger.error(e); throw new AnalysisException(e); } } try { randomForestImpl.randomForestTrainInit( inputSchema, tableName, timeStamp, dependentColumn, st, dataSet); } catch (SQLException e) { logger.error(e); throw new AnalysisException(e); } CartConfig config = new CartConfig(); config.setDependentColumn(dependentColumn); config.setConfidence(rfConfig.getConfidence()); config.setMaximal_depth(rfConfig.getMaximal_depth()); config.setMinimal_leaf_size(rfConfig.getMinimal_leaf_size()); config.setMinimal_size_for_split(rfConfig.getMinimal_size_for_split()); config.setNo_pre_pruning("true"); config.setNo_pruning("true"); for (int i = 0; i < forestSize; i++) { CartTrainer analyzer = new CartTrainer(); if (sampleWithReplacement == Resources.TrueOpt) { randomForestImpl.randomForestSample( inputSchema, timeStamp + "" + i, dependentColumn, st, rs, pnewTable, sampleTable + i, rfConfig.getLocale()); } else { randomForestImpl.randomForestSampleNoReplace( inputSchema, timeStamp + "" + i, dependentColumn, st, rs, pnewTable, sampleTable + i, rfConfig.getLocale(), dataSet.size()); } String subColumns = getSubColumns(totalColumns, subSize); config.setColumnNames(subColumns); DataBaseAnalyticSource tempsource = new DataBaseAnalyticSource( dbSystem, url, userName, password, inputSchema, sampleTable + i, useSSL); tempsource.setAnalyticConfiguration(config); tempsource.setConenction(conncetion); result = ((AnalyzerOutPutTrainModel) analyzer.doAnalysis(tempsource)) .getEngineModel() .getModel(); String OOBTable = "OOB" + sampleTable + i; randomForestImpl.generateOOBTable( inputSchema, OOBTable, pnewTable, sampleTable + i, st, rs); DataBaseAnalyticSource tempPredictSource = new DataBaseAnalyticSource( dbSystem, url, userName, password, inputSchema, OOBTable, useSSL); String predictOutTable = "OOBPredict" + sampleTable; EngineModel em = new EngineModel(); em.setModel(result); PredictorConfig tempconfig = new PredictorConfig(em); tempconfig.setDropIfExist(dropIfExists); tempconfig.setOutputSchema(inputSchema); tempconfig.setOutputTable(predictOutTable); tempPredictSource.setAnalyticConfiguration(tempconfig); tempPredictSource.setConenction(conncetion); AbstractDBModelPredictor predictor = new CartPredictor(); predictor.doAnalysis(tempPredictSource); // use the weak alg , do double OOBError = 0.0; if (result instanceof DecisionTreeModel) { OOBError = randomForestImpl.getOOBError( tempPredictSource, dependentColumn, "P(" + dependentColumn + ")"); lastResult.getOobEstimateError().add(OOBError); } else if (result instanceof RegressionTreeModel) { OOBError = randomForestImpl.getMSE(tempPredictSource, "P(" + dependentColumn + ")"); lastResult.getOobLoss().add(OOBError); double OOBMape = randomForestImpl.getMAPE( tempPredictSource, dependentColumn, "P(" + dependentColumn + ")"); lastResult.getOobMape().add(OOBMape); } else { OOBError = Double.NaN; lastResult.getOobLoss().add(OOBError); } lastResult.addModel((SingleModel) result); randomForestImpl.clearTrainResult(inputSchema, sampleTable + i); randomForestImpl.clearTrainResult(inputSchema, predictOutTable); randomForestImpl.clearTrainResult(inputSchema, OOBTable); } return lastResult; } catch (Exception e) { logger.error(e); if (e instanceof WrongUsedException) { throw new AnalysisError(this, (WrongUsedException) e); } else if (e instanceof AnalysisError) { throw (AnalysisError) e; } else { throw new AnalysisException(e); } } finally { try { if (st != null) { st.close(); } if (rs != null) { rs.close(); } } catch (SQLException e) { logger.error(e); throw new AnalysisException(e.getLocalizedMessage()); } } }
private void performOperation( DatabaseConnection databaseConnection, DataSet dataSet, Locale locale) throws AnalysisError, OperatorException { String outputTableName = getQuotaedTableName(getOutputSchema(), getOutputTable()); String inputTableName = getQuotaedTableName(getInputSchema(), getInputTable()); Columns atts = dataSet.getColumns(); String dbType = databaseConnection.getProperties().getName(); IDataSourceInfo dataSourceInfo = DataSourceInfoFactory.createConnectionInfo(dbType); IMultiDBUtility multiDBUtility = MultiDBUtilityFactory.createConnectionInfo(dbType); ISqlGeneratorMultiDB sqlGenerator = SqlGeneratorMultiDBFactory.createConnectionInfo(dbType); dropIfExist(dataSet); DatabaseUtil.alterParallel(databaseConnection, getOutputType()); // for oracle StringBuilder sb_create = new StringBuilder("create "); StringBuilder insertTable = new StringBuilder(); if (getOutputType().equalsIgnoreCase("table")) { sb_create.append(" table "); } else { sb_create.append(" view "); } sb_create.append(outputTableName); sb_create.append( getOutputType().equalsIgnoreCase(Resources.TableType) ? getAppendOnlyString() : ""); sb_create.append(DatabaseUtil.addParallel(databaseConnection, getOutputType())).append(" as ("); StringBuilder selectSql = new StringBuilder(" select "); selectSql.append(StringHandler.doubleQ(groupColumn)).append(","); Column att = atts.get(columnNames); dataSet.computeColumnStatistics(att); if (att.isNumerical()) { logger.error("PivotTableAnalyzer cannot accept numeric type column"); throw new AnalysisError( this, AnalysisErrorName.Not_numeric, locale, SDKLanguagePack.getMessage(SDKLanguagePack.PIVOT_NAME, locale)); } String attName = StringHandler.doubleQ(att.getName()); List<String> valueList = att.getMapping().getValues(); if (!useArray && valueList.size() > Integer.parseInt(AlpineMinerConfig.PIVOT_DISTINCTVALUE_THRESHOLD)) { logger.error("Too many distinct value for column " + StringHandler.doubleQ(columnNames)); throw new AnalysisError( this, AnalysisErrorName.Too_Many_Distinct_value, locale, StringHandler.doubleQ(columnNames), AlpineMinerConfig.PIVOT_DISTINCTVALUE_THRESHOLD); } if (valueList.size() <= 0) { logger.error("Empty table"); throw new AnalysisError(this, AnalysisErrorName.Empty_table, locale); } String aggColumnName; if (!StringUtil.isEmpty(aggColumn)) { aggColumnName = StringHandler.doubleQ(aggColumn); } else { aggColumnName = "1"; } Iterator<String> valueList_i = valueList.iterator(); if (useArray) { if (dataSourceInfo.getDBType().equals(DataSourceInfoOracle.dBType)) { ArrayList<String> array = new ArrayList<String>(); while (valueList_i.hasNext()) { String value = StringHandler.escQ(valueList_i.next()); String newValue = "alpine_miner_null_to_0(" + aggrType + " (case when " + attName + "=" + CommonUtility.quoteValue(dbType, att, value) + " then " + aggColumnName + " end )) "; array.add(newValue); } selectSql.append( CommonUtility.array2OracleArray(array, CommonUtility.OracleDataType.Float)); } else { selectSql.append(multiDBUtility.floatArrayHead()); while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("alpine_miner_null_to_0(").append(aggrType); selectSql.append(" (case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end )) "); // else 0 selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); selectSql.append(multiDBUtility.floatArrayTail()); } selectSql.append(" " + StringHandler.doubleQ(att.getName())); } else { if (((DBTable) dataSet.getDBTable()) .getDatabaseConnection() .getProperties() .getName() .equals(DataSourceInfoNZ.dBType)) { while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("(").append(aggrType); selectSql.append(" (case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end )) "); // else 0 String colName = StringHandler.doubleQ(att.getName() + "_" + value); selectSql.append(colName); selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); } else if (((DBTable) dataSet.getDBTable()) .getDatabaseConnection() .getProperties() .getName() .equals(DataSourceInfoDB2.dBType)) { while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("alpine_miner_null_to_0(").append(aggrType); selectSql.append(" (double(case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end ))) "); // else 0 String colName = StringHandler.doubleQ(att.getName() + "_" + value); selectSql.append(colName); selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); } else { while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("alpine_miner_null_to_0(").append(aggrType); selectSql.append(" (case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end )) "); // else 0 String colName = StringHandler.doubleQ(att.getName() + "_" + value); selectSql.append(colName); selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); } } selectSql.append(" from ").append(inputTableName).append(" foo group by "); selectSql.append(StringHandler.doubleQ(groupColumn)); if (((DBTable) dataSet.getDBTable()) .getDatabaseConnection() .getProperties() .getName() .equals(DataSourceInfoNZ.dBType)) { StringBuilder sb = new StringBuilder(); sb.append("select ").append(StringHandler.doubleQ(groupColumn)).append(","); Iterator<String> valueList_new = valueList.iterator(); while (valueList_new.hasNext()) { String value = valueList_new.next(); String colName = StringHandler.doubleQ(att.getName() + "_" + value); sb.append("case when ").append(colName).append(" is null then 0 else "); sb.append(colName).append(" end ").append(colName).append(","); } sb = sb.deleteCharAt(sb.length() - 1); sb.append(" from (").append(selectSql).append(") foo "); selectSql = sb; } sb_create.append(selectSql).append(" )"); if (getOutputType().equalsIgnoreCase("table")) { sb_create.append(getEndingString()); insertTable.append(sqlGenerator.insertTable(selectSql.toString(), outputTableName)); } try { Statement st = databaseConnection.createStatement(false); logger.debug("PivotTableAnalyzer.performOperation():sql=" + sb_create); st.execute(sb_create.toString()); if (insertTable.length() > 0) { st.execute(insertTable.toString()); logger.debug("PivotTableAnalyzer.performOperation():insertTableSql=" + insertTable); } } catch (SQLException e) { logger.error(e); if (e.getMessage().startsWith("ORA-03001") || e.getMessage().startsWith("ERROR: invalid identifier")) { throw new AnalysisError(this, AnalysisErrorName.Invalid_Identifier, locale); } else { throw new OperatorException(e.getLocalizedMessage()); } } }