private void validateSelectedTable( List<String> invalidParameterList, String paraName, String paraValue, VariableModel variableModel) { if (invalidParameterList.contains(paraName)) { return; } paraValue = VariableModelUtility.getReplaceValue(variableModel, paraValue); OperatorInputTableInfo operatorInputTableInfo = null; List<Object> operatorInputList = getOperatorInputList(); List<String> tableList = new ArrayList<String>(); String schema = null; for (Object obj : operatorInputList) { if (obj instanceof OperatorInputTableInfo) { operatorInputTableInfo = (OperatorInputTableInfo) obj; String table = operatorInputTableInfo.getTable(); table = VariableModelUtility.getReplaceValue(variableModel, table); tableList.add(table); schema = operatorInputTableInfo.getSchema(); schema = VariableModelUtility.getReplaceValue(variableModel, schema); } } if (!StringUtil.isEmpty(paraValue)) { String[] temp = paraValue.split("\\."); if (!temp[0].equals(schema) || !tableList.contains(temp[1])) { invalidParameterList.add(paraName); } } }
@Override public boolean equals(Object obj) { return StringUtil.safeEquals(pigScript, ((AnalysisPigExecutableModel) obj).getPigScript()) && ListUtility.equalsIgnoreOrder( pigInputMapItems, ((AnalysisPigExecutableModel) obj).getPigInputMapItems()); }
public Model learn(DataSet dataSet, LinearRegressionParameter para, String columnNames) throws OperatorException { this.dataSet = dataSet; ArrayList<String> columnNamesList = new ArrayList<String>(); if (columnNames != null && !StringUtil.isEmpty(columnNames.trim())) { String[] columnNamesArray = columnNames.split(","); for (String s : columnNamesArray) { columnNamesList.add(s); } } transformer.setColumnNames(columnNamesList); transformer.setAnalysisInterActionModel(para.getAnalysisInterActionModel()); newDataSet = transformer.TransformCategoryToNumeric_new(dataSet, groupbyColumn); DatabaseConnection databaseConnection = ((DBTable) newDataSet.getDBTable()).getDatabaseConnection(); Column label = newDataSet.getColumns().getLabel(); String labelName = StringHandler.doubleQ(label.getName()); String tableName = ((DBTable) dataSet.getDBTable()).getTableName(); String newTableName = ((DBTable) newDataSet.getDBTable()).getTableName(); try { st = databaseConnection.createStatement(false); } catch (SQLException e) { e.printStackTrace(); throw new OperatorException(e.getLocalizedMessage()); } try { newDataSet.computeAllColumnStatistics(); Columns atts = newDataSet.getColumns(); Iterator<Column> atts_i = atts.iterator(); int count = 0; String[] columnNamesArray = new String[atts.size()]; while (atts_i.hasNext()) { Column att = atts_i.next(); columnNamesArray[count] = att.getName(); count++; } null_list = calculateNull(dataSet); null_list_group = calculateNullGroup(newDataSet, atts); StringBuilder sb_notNull = getWhere(atts); getCoefficientAndR2Group( columnNames, dataSet, labelName, tableName, newTableName, atts, columnNamesArray, sb_notNull); HashMap<String, Long> degreeOfFreedom = new HashMap<String, Long>(); for (String groupValue : groupCount.keySet()) { long tempDof = groupCount.get(groupValue) - columnNamesArray.length - 1; if (tempDof <= 0) { model.getOneModel(groupValue).setS(Double.NaN); } degreeOfFreedom.put(groupValue, tempDof); } // if (dof <= 0) // { // model.setS(Double.NaN); // return model; // } StringBuffer sSQL = createSSQLLGroup( newDataSet, newTableName, label, columnNamesArray, coefficients, sb_notNull); HashMap<String, Double> sValueMap = new HashMap<String, Double>(); try { itsLogger.debug(classLogInfo + ".learn():sql=" + sSQL); rs = st.executeQuery(sSQL.toString()); while (rs.next()) { String groupValue = rs.getString(2); if (groupValue == null) { continue; } if (dataErrorList.contains(groupValue)) { sValueMap.put(groupValue, Double.NaN); } else { sValueMap.put(groupValue, rs.getDouble(1)); } } rs.close(); } catch (SQLException e) { e.printStackTrace(); itsLogger.error(e.getMessage(), e); throw new OperatorException(e.getLocalizedMessage()); } HashMap<String, Matrix> varianceCovarianceMatrix = getVarianceCovarianceMatrixGroup(newTableName, columnNamesArray, st); for (String groupValue : varianceCovarianceMatrix.keySet()) { if (varianceCovarianceMatrix == null) { model .getOneModel(groupValue) .setErrorString( AlpineDataAnalysisLanguagePack.getMessage( AlpineDataAnalysisLanguagePack.MATRIX_IS_SIGULAR, AlpineThreadLocal.getLocale()) + Tools.getLineSeparator()); } } caculateStatistics( columnNamesArray, coefficients, model, sValueMap, varianceCovarianceMatrix, degreeOfFreedom); for (String groupValue : null_list_group.keySet()) { if (null_list_group.get(groupValue).size() != 0) { StringBuilder sb_null = new StringBuilder(); for (int i = 0; i < null_list_group.get(groupValue).size(); i++) { sb_null .append(StringHandler.doubleQ(null_list_group.get(groupValue).get(i))) .append(","); } sb_null = sb_null.deleteCharAt(sb_null.length() - 1); String table_exist_null = AlpineDataAnalysisLanguagePack.getMessage( AlpineDataAnalysisLanguagePack.TABLE_EXIST_NULL, AlpineThreadLocal.getLocale()); String[] temp = table_exist_null.split(";"); model .getOneModel(groupValue) .setErrorString(temp[0] + sb_null.toString() + temp[1] + Tools.getLineSeparator()); } } if (transformer.isTransform()) { dropTable(newTableName); } st.close(); itsLogger.debug(LogUtils.exit(classLogInfo, "learn", model.toString())); model.setGroupByColumn(groupbyColumn); return model; } catch (Exception e) { itsLogger.error(e.getMessage(), e); throw new OperatorException(e.getLocalizedMessage()); } }
@Override public VisualizationOutPut generateOutPut(AnalyticOutPut analyzerOutPut) { EngineModel emodel = null; ARIMAModel model = null; if (!(analyzerOutPut instanceof AnalyzerOutPutTrainModel)) return null; emodel = (EngineModel) ((AnalyzerOutPutTrainModel) analyzerOutPut).getEngineModel(); if (emodel == null || !(emodel.getModel() instanceof ARIMAModel)) return null; model = (ARIMAModel) emodel.getModel(); List<TextAndTableListEntity> textAndTableListEntityList = new ArrayList<TextAndTableListEntity>(); List<DataTextAndTableListVisualizationOutPut> textAndTableListOutput = new ArrayList<DataTextAndTableListVisualizationOutPut>(); Map<String, TextAndTableListEntity> nameEntityMap = new HashMap<String, TextAndTableListEntity>(); VisualizationOutPut outputTable = null; List<SingleARIMAModel> modelList = model.getModels(); if (StringUtil.isEmpty(model.getGroupColumnName())) { // no groupBY TextTable table = getVTextTable(modelList.get(0)); TableEntity te = new TableEntity(); generateTableEntity(table, te); TextAndTableListEntity textAndTableListEntity = new TextAndTableListEntity(); textAndTableListEntity.addTableEntity(te); textAndTableListEntity.setText(getVTextText(modelList.get(0))); outputTable = new DataTextAndTableListVisualizationOutPut(textAndTableListEntity); outputTable.setName(analyzerOutPut.getAnalyticNode().getName()); } else { String[] availableValue = new String[modelList.size()]; int k = 0; for (SingleARIMAModel singleModel : modelList) { TextTable table = getVTextTable(singleModel); // default value is the first TableEntity te = new TableEntity(); generateTableEntity(table, te); TextAndTableListEntity textAndTableListEntity = new TextAndTableListEntity(); textAndTableListEntity.addTableEntity(te); textAndTableListEntity.setText(getVTextText(singleModel)); textAndTableListEntityList.add(textAndTableListEntity); DataTextAndTableListVisualizationOutPut output = new DataTextAndTableListVisualizationOutPut(textAndTableListEntity); textAndTableListOutput.add(output); availableValue[k] = singleModel.getGroupColumnValue(); nameEntityMap.put(availableValue[k], textAndTableListEntity); k++; } MultiTextAndTableListEntity tableList = new MultiTextAndTableListEntity(); tableList.setTextAndTableListEntityList(textAndTableListEntityList); tableList.setAvaiableValue(availableValue); tableList.setNameEntityMap(nameEntityMap); outputTable = new MultiDataTextAndTableListVisualizationOutPut(tableList); outputTable.setName(analyzerOutPut.getAnalyticNode().getName()); ((MultiDataTextAndTableListVisualizationOutPut) outputTable) .setTextAndTableListOutput(textAndTableListOutput); } return outputTable; }
private void performOperation( DatabaseConnection databaseConnection, DataSet dataSet, Locale locale) throws AnalysisError, OperatorException { String outputTableName = getQuotaedTableName(getOutputSchema(), getOutputTable()); String inputTableName = getQuotaedTableName(getInputSchema(), getInputTable()); Columns atts = dataSet.getColumns(); String dbType = databaseConnection.getProperties().getName(); IDataSourceInfo dataSourceInfo = DataSourceInfoFactory.createConnectionInfo(dbType); IMultiDBUtility multiDBUtility = MultiDBUtilityFactory.createConnectionInfo(dbType); ISqlGeneratorMultiDB sqlGenerator = SqlGeneratorMultiDBFactory.createConnectionInfo(dbType); dropIfExist(dataSet); DatabaseUtil.alterParallel(databaseConnection, getOutputType()); // for oracle StringBuilder sb_create = new StringBuilder("create "); StringBuilder insertTable = new StringBuilder(); if (getOutputType().equalsIgnoreCase("table")) { sb_create.append(" table "); } else { sb_create.append(" view "); } sb_create.append(outputTableName); sb_create.append( getOutputType().equalsIgnoreCase(Resources.TableType) ? getAppendOnlyString() : ""); sb_create.append(DatabaseUtil.addParallel(databaseConnection, getOutputType())).append(" as ("); StringBuilder selectSql = new StringBuilder(" select "); selectSql.append(StringHandler.doubleQ(groupColumn)).append(","); Column att = atts.get(columnNames); dataSet.computeColumnStatistics(att); if (att.isNumerical()) { logger.error("PivotTableAnalyzer cannot accept numeric type column"); throw new AnalysisError( this, AnalysisErrorName.Not_numeric, locale, SDKLanguagePack.getMessage(SDKLanguagePack.PIVOT_NAME, locale)); } String attName = StringHandler.doubleQ(att.getName()); List<String> valueList = att.getMapping().getValues(); if (!useArray && valueList.size() > Integer.parseInt(AlpineMinerConfig.PIVOT_DISTINCTVALUE_THRESHOLD)) { logger.error("Too many distinct value for column " + StringHandler.doubleQ(columnNames)); throw new AnalysisError( this, AnalysisErrorName.Too_Many_Distinct_value, locale, StringHandler.doubleQ(columnNames), AlpineMinerConfig.PIVOT_DISTINCTVALUE_THRESHOLD); } if (valueList.size() <= 0) { logger.error("Empty table"); throw new AnalysisError(this, AnalysisErrorName.Empty_table, locale); } String aggColumnName; if (!StringUtil.isEmpty(aggColumn)) { aggColumnName = StringHandler.doubleQ(aggColumn); } else { aggColumnName = "1"; } Iterator<String> valueList_i = valueList.iterator(); if (useArray) { if (dataSourceInfo.getDBType().equals(DataSourceInfoOracle.dBType)) { ArrayList<String> array = new ArrayList<String>(); while (valueList_i.hasNext()) { String value = StringHandler.escQ(valueList_i.next()); String newValue = "alpine_miner_null_to_0(" + aggrType + " (case when " + attName + "=" + CommonUtility.quoteValue(dbType, att, value) + " then " + aggColumnName + " end )) "; array.add(newValue); } selectSql.append( CommonUtility.array2OracleArray(array, CommonUtility.OracleDataType.Float)); } else { selectSql.append(multiDBUtility.floatArrayHead()); while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("alpine_miner_null_to_0(").append(aggrType); selectSql.append(" (case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end )) "); // else 0 selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); selectSql.append(multiDBUtility.floatArrayTail()); } selectSql.append(" " + StringHandler.doubleQ(att.getName())); } else { if (((DBTable) dataSet.getDBTable()) .getDatabaseConnection() .getProperties() .getName() .equals(DataSourceInfoNZ.dBType)) { while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("(").append(aggrType); selectSql.append(" (case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end )) "); // else 0 String colName = StringHandler.doubleQ(att.getName() + "_" + value); selectSql.append(colName); selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); } else if (((DBTable) dataSet.getDBTable()) .getDatabaseConnection() .getProperties() .getName() .equals(DataSourceInfoDB2.dBType)) { while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("alpine_miner_null_to_0(").append(aggrType); selectSql.append(" (double(case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end ))) "); // else 0 String colName = StringHandler.doubleQ(att.getName() + "_" + value); selectSql.append(colName); selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); } else { while (valueList_i.hasNext()) { String value = valueList_i.next(); selectSql.append("alpine_miner_null_to_0(").append(aggrType); selectSql.append(" (case when ").append(attName).append("="); value = StringHandler.escQ(value); selectSql .append(CommonUtility.quoteValue(dbType, att, value)) .append(" then ") .append(aggColumnName) .append(" end )) "); // else 0 String colName = StringHandler.doubleQ(att.getName() + "_" + value); selectSql.append(colName); selectSql.append(","); } selectSql = selectSql.deleteCharAt(selectSql.length() - 1); } } selectSql.append(" from ").append(inputTableName).append(" foo group by "); selectSql.append(StringHandler.doubleQ(groupColumn)); if (((DBTable) dataSet.getDBTable()) .getDatabaseConnection() .getProperties() .getName() .equals(DataSourceInfoNZ.dBType)) { StringBuilder sb = new StringBuilder(); sb.append("select ").append(StringHandler.doubleQ(groupColumn)).append(","); Iterator<String> valueList_new = valueList.iterator(); while (valueList_new.hasNext()) { String value = valueList_new.next(); String colName = StringHandler.doubleQ(att.getName() + "_" + value); sb.append("case when ").append(colName).append(" is null then 0 else "); sb.append(colName).append(" end ").append(colName).append(","); } sb = sb.deleteCharAt(sb.length() - 1); sb.append(" from (").append(selectSql).append(") foo "); selectSql = sb; } sb_create.append(selectSql).append(" )"); if (getOutputType().equalsIgnoreCase("table")) { sb_create.append(getEndingString()); insertTable.append(sqlGenerator.insertTable(selectSql.toString(), outputTableName)); } try { Statement st = databaseConnection.createStatement(false); logger.debug("PivotTableAnalyzer.performOperation():sql=" + sb_create); st.execute(sb_create.toString()); if (insertTable.length() > 0) { st.execute(insertTable.toString()); logger.debug("PivotTableAnalyzer.performOperation():insertTableSql=" + insertTable); } } catch (SQLException e) { logger.error(e); if (e.getMessage().startsWith("ORA-03001") || e.getMessage().startsWith("ERROR: invalid identifier")) { throw new AnalysisError(this, AnalysisErrorName.Invalid_Identifier, locale); } else { throw new OperatorException(e.getLocalizedMessage()); } } }